Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *   Copyright (C) International Business Machines Corp., 2000-2004
0004  */
0005 
0006 /*
0007  *  jfs_imap.c: inode allocation map manager
0008  *
0009  * Serialization:
0010  *   Each AG has a simple lock which is used to control the serialization of
0011  *  the AG level lists.  This lock should be taken first whenever an AG
0012  *  level list will be modified or accessed.
0013  *
0014  *   Each IAG is locked by obtaining the buffer for the IAG page.
0015  *
0016  *   There is also a inode lock for the inode map inode.  A read lock needs to
0017  *  be taken whenever an IAG is read from the map or the global level
0018  *  information is read.  A write lock needs to be taken whenever the global
0019  *  level information is modified or an atomic operation needs to be used.
0020  *
0021  *  If more than one IAG is read at one time, the read lock may not
0022  *  be given up until all of the IAG's are read.  Otherwise, a deadlock
0023  *  may occur when trying to obtain the read lock while another thread
0024  *  holding the read lock is waiting on the IAG already being held.
0025  *
0026  *   The control page of the inode map is read into memory by diMount().
0027  *  Thereafter it should only be modified in memory and then it will be
0028  *  written out when the filesystem is unmounted by diUnmount().
0029  */
0030 
0031 #include <linux/fs.h>
0032 #include <linux/buffer_head.h>
0033 #include <linux/pagemap.h>
0034 #include <linux/quotaops.h>
0035 #include <linux/slab.h>
0036 
0037 #include "jfs_incore.h"
0038 #include "jfs_inode.h"
0039 #include "jfs_filsys.h"
0040 #include "jfs_dinode.h"
0041 #include "jfs_dmap.h"
0042 #include "jfs_imap.h"
0043 #include "jfs_metapage.h"
0044 #include "jfs_superblock.h"
0045 #include "jfs_debug.h"
0046 
0047 /*
0048  * imap locks
0049  */
0050 /* iag free list lock */
0051 #define IAGFREE_LOCK_INIT(imap)     mutex_init(&imap->im_freelock)
0052 #define IAGFREE_LOCK(imap)      mutex_lock(&imap->im_freelock)
0053 #define IAGFREE_UNLOCK(imap)        mutex_unlock(&imap->im_freelock)
0054 
0055 /* per ag iag list locks */
0056 #define AG_LOCK_INIT(imap,index)    mutex_init(&(imap->im_aglock[index]))
0057 #define AG_LOCK(imap,agno)      mutex_lock(&imap->im_aglock[agno])
0058 #define AG_UNLOCK(imap,agno)        mutex_unlock(&imap->im_aglock[agno])
0059 
0060 /*
0061  * forward references
0062  */
0063 static int diAllocAG(struct inomap *, int, bool, struct inode *);
0064 static int diAllocAny(struct inomap *, int, bool, struct inode *);
0065 static int diAllocBit(struct inomap *, struct iag *, int);
0066 static int diAllocExt(struct inomap *, int, struct inode *);
0067 static int diAllocIno(struct inomap *, int, struct inode *);
0068 static int diFindFree(u32, int);
0069 static int diNewExt(struct inomap *, struct iag *, int);
0070 static int diNewIAG(struct inomap *, int *, int, struct metapage **);
0071 static void duplicateIXtree(struct super_block *, s64, int, s64 *);
0072 
0073 static int diIAGRead(struct inomap * imap, int, struct metapage **);
0074 static int copy_from_dinode(struct dinode *, struct inode *);
0075 static void copy_to_dinode(struct dinode *, struct inode *);
0076 
0077 /*
0078  * NAME:    diMount()
0079  *
0080  * FUNCTION:    initialize the incore inode map control structures for
0081  *      a fileset or aggregate init time.
0082  *
0083  *      the inode map's control structure (dinomap) is
0084  *      brought in from disk and placed in virtual memory.
0085  *
0086  * PARAMETERS:
0087  *  ipimap  - pointer to inode map inode for the aggregate or fileset.
0088  *
0089  * RETURN VALUES:
0090  *  0   - success
0091  *  -ENOMEM - insufficient free virtual memory.
0092  *  -EIO    - i/o error.
0093  */
0094 int diMount(struct inode *ipimap)
0095 {
0096     struct inomap *imap;
0097     struct metapage *mp;
0098     int index;
0099     struct dinomap_disk *dinom_le;
0100 
0101     /*
0102      * allocate/initialize the in-memory inode map control structure
0103      */
0104     /* allocate the in-memory inode map control structure. */
0105     imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
0106     if (imap == NULL)
0107         return -ENOMEM;
0108 
0109     /* read the on-disk inode map control structure. */
0110 
0111     mp = read_metapage(ipimap,
0112                IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
0113                PSIZE, 0);
0114     if (mp == NULL) {
0115         kfree(imap);
0116         return -EIO;
0117     }
0118 
0119     /* copy the on-disk version to the in-memory version. */
0120     dinom_le = (struct dinomap_disk *) mp->data;
0121     imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
0122     imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
0123     atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
0124     atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
0125     imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
0126     imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
0127     for (index = 0; index < MAXAG; index++) {
0128         imap->im_agctl[index].inofree =
0129             le32_to_cpu(dinom_le->in_agctl[index].inofree);
0130         imap->im_agctl[index].extfree =
0131             le32_to_cpu(dinom_le->in_agctl[index].extfree);
0132         imap->im_agctl[index].numinos =
0133             le32_to_cpu(dinom_le->in_agctl[index].numinos);
0134         imap->im_agctl[index].numfree =
0135             le32_to_cpu(dinom_le->in_agctl[index].numfree);
0136     }
0137 
0138     /* release the buffer. */
0139     release_metapage(mp);
0140 
0141     /*
0142      * allocate/initialize inode allocation map locks
0143      */
0144     /* allocate and init iag free list lock */
0145     IAGFREE_LOCK_INIT(imap);
0146 
0147     /* allocate and init ag list locks */
0148     for (index = 0; index < MAXAG; index++) {
0149         AG_LOCK_INIT(imap, index);
0150     }
0151 
0152     /* bind the inode map inode and inode map control structure
0153      * to each other.
0154      */
0155     imap->im_ipimap = ipimap;
0156     JFS_IP(ipimap)->i_imap = imap;
0157 
0158     return (0);
0159 }
0160 
0161 
0162 /*
0163  * NAME:    diUnmount()
0164  *
0165  * FUNCTION:    write to disk the incore inode map control structures for
0166  *      a fileset or aggregate at unmount time.
0167  *
0168  * PARAMETERS:
0169  *  ipimap  - pointer to inode map inode for the aggregate or fileset.
0170  *
0171  * RETURN VALUES:
0172  *  0   - success
0173  *  -ENOMEM - insufficient free virtual memory.
0174  *  -EIO    - i/o error.
0175  */
0176 int diUnmount(struct inode *ipimap, int mounterror)
0177 {
0178     struct inomap *imap = JFS_IP(ipimap)->i_imap;
0179 
0180     /*
0181      * update the on-disk inode map control structure
0182      */
0183 
0184     if (!(mounterror || isReadOnly(ipimap)))
0185         diSync(ipimap);
0186 
0187     /*
0188      * Invalidate the page cache buffers
0189      */
0190     truncate_inode_pages(ipimap->i_mapping, 0);
0191 
0192     /*
0193      * free in-memory control structure
0194      */
0195     kfree(imap);
0196 
0197     return (0);
0198 }
0199 
0200 
0201 /*
0202  *  diSync()
0203  */
0204 int diSync(struct inode *ipimap)
0205 {
0206     struct dinomap_disk *dinom_le;
0207     struct inomap *imp = JFS_IP(ipimap)->i_imap;
0208     struct metapage *mp;
0209     int index;
0210 
0211     /*
0212      * write imap global conrol page
0213      */
0214     /* read the on-disk inode map control structure */
0215     mp = get_metapage(ipimap,
0216               IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
0217               PSIZE, 0);
0218     if (mp == NULL) {
0219         jfs_err("diSync: get_metapage failed!");
0220         return -EIO;
0221     }
0222 
0223     /* copy the in-memory version to the on-disk version */
0224     dinom_le = (struct dinomap_disk *) mp->data;
0225     dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
0226     dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
0227     dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
0228     dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
0229     dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
0230     dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
0231     for (index = 0; index < MAXAG; index++) {
0232         dinom_le->in_agctl[index].inofree =
0233             cpu_to_le32(imp->im_agctl[index].inofree);
0234         dinom_le->in_agctl[index].extfree =
0235             cpu_to_le32(imp->im_agctl[index].extfree);
0236         dinom_le->in_agctl[index].numinos =
0237             cpu_to_le32(imp->im_agctl[index].numinos);
0238         dinom_le->in_agctl[index].numfree =
0239             cpu_to_le32(imp->im_agctl[index].numfree);
0240     }
0241 
0242     /* write out the control structure */
0243     write_metapage(mp);
0244 
0245     /*
0246      * write out dirty pages of imap
0247      */
0248     filemap_write_and_wait(ipimap->i_mapping);
0249 
0250     diWriteSpecial(ipimap, 0);
0251 
0252     return (0);
0253 }
0254 
0255 
0256 /*
0257  * NAME:    diRead()
0258  *
0259  * FUNCTION:    initialize an incore inode from disk.
0260  *
0261  *      on entry, the specifed incore inode should itself
0262  *      specify the disk inode number corresponding to the
0263  *      incore inode (i.e. i_number should be initialized).
0264  *
0265  *      this routine handles incore inode initialization for
0266  *      both "special" and "regular" inodes.  special inodes
0267  *      are those required early in the mount process and
0268  *      require special handling since much of the file system
0269  *      is not yet initialized.  these "special" inodes are
0270  *      identified by a NULL inode map inode pointer and are
0271  *      actually initialized by a call to diReadSpecial().
0272  *
0273  *      for regular inodes, the iag describing the disk inode
0274  *      is read from disk to determine the inode extent address
0275  *      for the disk inode.  with the inode extent address in
0276  *      hand, the page of the extent that contains the disk
0277  *      inode is read and the disk inode is copied to the
0278  *      incore inode.
0279  *
0280  * PARAMETERS:
0281  *  ip  -  pointer to incore inode to be initialized from disk.
0282  *
0283  * RETURN VALUES:
0284  *  0   - success
0285  *  -EIO    - i/o error.
0286  *  -ENOMEM - insufficient memory
0287  *
0288  */
0289 int diRead(struct inode *ip)
0290 {
0291     struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
0292     int iagno, ino, extno, rc;
0293     struct inode *ipimap;
0294     struct dinode *dp;
0295     struct iag *iagp;
0296     struct metapage *mp;
0297     s64 blkno, agstart;
0298     struct inomap *imap;
0299     int block_offset;
0300     int inodes_left;
0301     unsigned long pageno;
0302     int rel_inode;
0303 
0304     jfs_info("diRead: ino = %ld", ip->i_ino);
0305 
0306     ipimap = sbi->ipimap;
0307     JFS_IP(ip)->ipimap = ipimap;
0308 
0309     /* determine the iag number for this inode (number) */
0310     iagno = INOTOIAG(ip->i_ino);
0311 
0312     /* read the iag */
0313     imap = JFS_IP(ipimap)->i_imap;
0314     IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
0315     rc = diIAGRead(imap, iagno, &mp);
0316     IREAD_UNLOCK(ipimap);
0317     if (rc) {
0318         jfs_err("diRead: diIAGRead returned %d", rc);
0319         return (rc);
0320     }
0321 
0322     iagp = (struct iag *) mp->data;
0323 
0324     /* determine inode extent that holds the disk inode */
0325     ino = ip->i_ino & (INOSPERIAG - 1);
0326     extno = ino >> L2INOSPEREXT;
0327 
0328     if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
0329         (addressPXD(&iagp->inoext[extno]) == 0)) {
0330         release_metapage(mp);
0331         return -ESTALE;
0332     }
0333 
0334     /* get disk block number of the page within the inode extent
0335      * that holds the disk inode.
0336      */
0337     blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
0338 
0339     /* get the ag for the iag */
0340     agstart = le64_to_cpu(iagp->agstart);
0341 
0342     release_metapage(mp);
0343 
0344     rel_inode = (ino & (INOSPERPAGE - 1));
0345     pageno = blkno >> sbi->l2nbperpage;
0346 
0347     if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
0348         /*
0349          * OS/2 didn't always align inode extents on page boundaries
0350          */
0351         inodes_left =
0352              (sbi->nbperpage - block_offset) << sbi->l2niperblk;
0353 
0354         if (rel_inode < inodes_left)
0355             rel_inode += block_offset << sbi->l2niperblk;
0356         else {
0357             pageno += 1;
0358             rel_inode -= inodes_left;
0359         }
0360     }
0361 
0362     /* read the page of disk inode */
0363     mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
0364     if (!mp) {
0365         jfs_err("diRead: read_metapage failed");
0366         return -EIO;
0367     }
0368 
0369     /* locate the disk inode requested */
0370     dp = (struct dinode *) mp->data;
0371     dp += rel_inode;
0372 
0373     if (ip->i_ino != le32_to_cpu(dp->di_number)) {
0374         jfs_error(ip->i_sb, "i_ino != di_number\n");
0375         rc = -EIO;
0376     } else if (le32_to_cpu(dp->di_nlink) == 0)
0377         rc = -ESTALE;
0378     else
0379         /* copy the disk inode to the in-memory inode */
0380         rc = copy_from_dinode(dp, ip);
0381 
0382     release_metapage(mp);
0383 
0384     /* set the ag for the inode */
0385     JFS_IP(ip)->agstart = agstart;
0386     JFS_IP(ip)->active_ag = -1;
0387 
0388     return (rc);
0389 }
0390 
0391 
0392 /*
0393  * NAME:    diReadSpecial()
0394  *
0395  * FUNCTION:    initialize a 'special' inode from disk.
0396  *
0397  *      this routines handles aggregate level inodes.  The
0398  *      inode cache cannot differentiate between the
0399  *      aggregate inodes and the filesystem inodes, so we
0400  *      handle these here.  We don't actually use the aggregate
0401  *      inode map, since these inodes are at a fixed location
0402  *      and in some cases the aggregate inode map isn't initialized
0403  *      yet.
0404  *
0405  * PARAMETERS:
0406  *  sb - filesystem superblock
0407  *  inum - aggregate inode number
0408  *  secondary - 1 if secondary aggregate inode table
0409  *
0410  * RETURN VALUES:
0411  *  new inode   - success
0412  *  NULL        - i/o error.
0413  */
0414 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
0415 {
0416     struct jfs_sb_info *sbi = JFS_SBI(sb);
0417     uint address;
0418     struct dinode *dp;
0419     struct inode *ip;
0420     struct metapage *mp;
0421 
0422     ip = new_inode(sb);
0423     if (ip == NULL) {
0424         jfs_err("diReadSpecial: new_inode returned NULL!");
0425         return ip;
0426     }
0427 
0428     if (secondary) {
0429         address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
0430         JFS_IP(ip)->ipimap = sbi->ipaimap2;
0431     } else {
0432         address = AITBL_OFF >> L2PSIZE;
0433         JFS_IP(ip)->ipimap = sbi->ipaimap;
0434     }
0435 
0436     ASSERT(inum < INOSPEREXT);
0437 
0438     ip->i_ino = inum;
0439 
0440     address += inum >> 3;   /* 8 inodes per 4K page */
0441 
0442     /* read the page of fixed disk inode (AIT) in raw mode */
0443     mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
0444     if (mp == NULL) {
0445         set_nlink(ip, 1);   /* Don't want iput() deleting it */
0446         iput(ip);
0447         return (NULL);
0448     }
0449 
0450     /* get the pointer to the disk inode of interest */
0451     dp = (struct dinode *) (mp->data);
0452     dp += inum % 8;     /* 8 inodes per 4K page */
0453 
0454     /* copy on-disk inode to in-memory inode */
0455     if ((copy_from_dinode(dp, ip)) != 0) {
0456         /* handle bad return by returning NULL for ip */
0457         set_nlink(ip, 1);   /* Don't want iput() deleting it */
0458         iput(ip);
0459         /* release the page */
0460         release_metapage(mp);
0461         return (NULL);
0462 
0463     }
0464 
0465     ip->i_mapping->a_ops = &jfs_metapage_aops;
0466     mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
0467 
0468     /* Allocations to metadata inodes should not affect quotas */
0469     ip->i_flags |= S_NOQUOTA;
0470 
0471     if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
0472         sbi->gengen = le32_to_cpu(dp->di_gengen);
0473         sbi->inostamp = le32_to_cpu(dp->di_inostamp);
0474     }
0475 
0476     /* release the page */
0477     release_metapage(mp);
0478 
0479     inode_fake_hash(ip);
0480 
0481     return (ip);
0482 }
0483 
0484 /*
0485  * NAME:    diWriteSpecial()
0486  *
0487  * FUNCTION:    Write the special inode to disk
0488  *
0489  * PARAMETERS:
0490  *  ip - special inode
0491  *  secondary - 1 if secondary aggregate inode table
0492  *
0493  * RETURN VALUES: none
0494  */
0495 
0496 void diWriteSpecial(struct inode *ip, int secondary)
0497 {
0498     struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
0499     uint address;
0500     struct dinode *dp;
0501     ino_t inum = ip->i_ino;
0502     struct metapage *mp;
0503 
0504     if (secondary)
0505         address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
0506     else
0507         address = AITBL_OFF >> L2PSIZE;
0508 
0509     ASSERT(inum < INOSPEREXT);
0510 
0511     address += inum >> 3;   /* 8 inodes per 4K page */
0512 
0513     /* read the page of fixed disk inode (AIT) in raw mode */
0514     mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
0515     if (mp == NULL) {
0516         jfs_err("diWriteSpecial: failed to read aggregate inode extent!");
0517         return;
0518     }
0519 
0520     /* get the pointer to the disk inode of interest */
0521     dp = (struct dinode *) (mp->data);
0522     dp += inum % 8;     /* 8 inodes per 4K page */
0523 
0524     /* copy on-disk inode to in-memory inode */
0525     copy_to_dinode(dp, ip);
0526     memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
0527 
0528     if (inum == FILESYSTEM_I)
0529         dp->di_gengen = cpu_to_le32(sbi->gengen);
0530 
0531     /* write the page */
0532     write_metapage(mp);
0533 }
0534 
0535 /*
0536  * NAME:    diFreeSpecial()
0537  *
0538  * FUNCTION:    Free allocated space for special inode
0539  */
0540 void diFreeSpecial(struct inode *ip)
0541 {
0542     if (ip == NULL) {
0543         jfs_err("diFreeSpecial called with NULL ip!");
0544         return;
0545     }
0546     filemap_write_and_wait(ip->i_mapping);
0547     truncate_inode_pages(ip->i_mapping, 0);
0548     iput(ip);
0549 }
0550 
0551 
0552 
0553 /*
0554  * NAME:    diWrite()
0555  *
0556  * FUNCTION:    write the on-disk inode portion of the in-memory inode
0557  *      to its corresponding on-disk inode.
0558  *
0559  *      on entry, the specifed incore inode should itself
0560  *      specify the disk inode number corresponding to the
0561  *      incore inode (i.e. i_number should be initialized).
0562  *
0563  *      the inode contains the inode extent address for the disk
0564  *      inode.  with the inode extent address in hand, the
0565  *      page of the extent that contains the disk inode is
0566  *      read and the disk inode portion of the incore inode
0567  *      is copied to the disk inode.
0568  *
0569  * PARAMETERS:
0570  *  tid -  transacation id
0571  *  ip  -  pointer to incore inode to be written to the inode extent.
0572  *
0573  * RETURN VALUES:
0574  *  0   - success
0575  *  -EIO    - i/o error.
0576  */
0577 int diWrite(tid_t tid, struct inode *ip)
0578 {
0579     struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
0580     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
0581     int rc = 0;
0582     s32 ino;
0583     struct dinode *dp;
0584     s64 blkno;
0585     int block_offset;
0586     int inodes_left;
0587     struct metapage *mp;
0588     unsigned long pageno;
0589     int rel_inode;
0590     int dioffset;
0591     struct inode *ipimap;
0592     uint type;
0593     lid_t lid;
0594     struct tlock *ditlck, *tlck;
0595     struct linelock *dilinelock, *ilinelock;
0596     struct lv *lv;
0597     int n;
0598 
0599     ipimap = jfs_ip->ipimap;
0600 
0601     ino = ip->i_ino & (INOSPERIAG - 1);
0602 
0603     if (!addressPXD(&(jfs_ip->ixpxd)) ||
0604         (lengthPXD(&(jfs_ip->ixpxd)) !=
0605          JFS_IP(ipimap)->i_imap->im_nbperiext)) {
0606         jfs_error(ip->i_sb, "ixpxd invalid\n");
0607         return -EIO;
0608     }
0609 
0610     /*
0611      * read the page of disk inode containing the specified inode:
0612      */
0613     /* compute the block address of the page */
0614     blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
0615 
0616     rel_inode = (ino & (INOSPERPAGE - 1));
0617     pageno = blkno >> sbi->l2nbperpage;
0618 
0619     if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
0620         /*
0621          * OS/2 didn't always align inode extents on page boundaries
0622          */
0623         inodes_left =
0624             (sbi->nbperpage - block_offset) << sbi->l2niperblk;
0625 
0626         if (rel_inode < inodes_left)
0627             rel_inode += block_offset << sbi->l2niperblk;
0628         else {
0629             pageno += 1;
0630             rel_inode -= inodes_left;
0631         }
0632     }
0633     /* read the page of disk inode */
0634       retry:
0635     mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
0636     if (!mp)
0637         return -EIO;
0638 
0639     /* get the pointer to the disk inode */
0640     dp = (struct dinode *) mp->data;
0641     dp += rel_inode;
0642 
0643     dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
0644 
0645     /*
0646      * acquire transaction lock on the on-disk inode;
0647      * N.B. tlock is acquired on ipimap not ip;
0648      */
0649     if ((ditlck =
0650          txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
0651         goto retry;
0652     dilinelock = (struct linelock *) & ditlck->lock;
0653 
0654     /*
0655      * copy btree root from in-memory inode to on-disk inode
0656      *
0657      * (tlock is taken from inline B+-tree root in in-memory
0658      * inode when the B+-tree root is updated, which is pointed
0659      * by jfs_ip->blid as well as being on tx tlock list)
0660      *
0661      * further processing of btree root is based on the copy
0662      * in in-memory inode, where txLog() will log from, and,
0663      * for xtree root, txUpdateMap() will update map and reset
0664      * XAD_NEW bit;
0665      */
0666 
0667     if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
0668         /*
0669          * This is the special xtree inside the directory for storing
0670          * the directory table
0671          */
0672         xtpage_t *p, *xp;
0673         xad_t *xad;
0674 
0675         jfs_ip->xtlid = 0;
0676         tlck = lid_to_tlock(lid);
0677         assert(tlck->type & tlckXTREE);
0678         tlck->type |= tlckBTROOT;
0679         tlck->mp = mp;
0680         ilinelock = (struct linelock *) & tlck->lock;
0681 
0682         /*
0683          * copy xtree root from inode to dinode:
0684          */
0685         p = &jfs_ip->i_xtroot;
0686         xp = (xtpage_t *) &dp->di_dirtable;
0687         lv = ilinelock->lv;
0688         for (n = 0; n < ilinelock->index; n++, lv++) {
0689             memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
0690                    lv->length << L2XTSLOTSIZE);
0691         }
0692 
0693         /* reset on-disk (metadata page) xtree XAD_NEW bit */
0694         xad = &xp->xad[XTENTRYSTART];
0695         for (n = XTENTRYSTART;
0696              n < le16_to_cpu(xp->header.nextindex); n++, xad++)
0697             if (xad->flag & (XAD_NEW | XAD_EXTENDED))
0698                 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
0699     }
0700 
0701     if ((lid = jfs_ip->blid) == 0)
0702         goto inlineData;
0703     jfs_ip->blid = 0;
0704 
0705     tlck = lid_to_tlock(lid);
0706     type = tlck->type;
0707     tlck->type |= tlckBTROOT;
0708     tlck->mp = mp;
0709     ilinelock = (struct linelock *) & tlck->lock;
0710 
0711     /*
0712      *  regular file: 16 byte (XAD slot) granularity
0713      */
0714     if (type & tlckXTREE) {
0715         xtpage_t *p, *xp;
0716         xad_t *xad;
0717 
0718         /*
0719          * copy xtree root from inode to dinode:
0720          */
0721         p = &jfs_ip->i_xtroot;
0722         xp = &dp->di_xtroot;
0723         lv = ilinelock->lv;
0724         for (n = 0; n < ilinelock->index; n++, lv++) {
0725             memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
0726                    lv->length << L2XTSLOTSIZE);
0727         }
0728 
0729         /* reset on-disk (metadata page) xtree XAD_NEW bit */
0730         xad = &xp->xad[XTENTRYSTART];
0731         for (n = XTENTRYSTART;
0732              n < le16_to_cpu(xp->header.nextindex); n++, xad++)
0733             if (xad->flag & (XAD_NEW | XAD_EXTENDED))
0734                 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
0735     }
0736     /*
0737      *  directory: 32 byte (directory entry slot) granularity
0738      */
0739     else if (type & tlckDTREE) {
0740         dtpage_t *p, *xp;
0741 
0742         /*
0743          * copy dtree root from inode to dinode:
0744          */
0745         p = (dtpage_t *) &jfs_ip->i_dtroot;
0746         xp = (dtpage_t *) & dp->di_dtroot;
0747         lv = ilinelock->lv;
0748         for (n = 0; n < ilinelock->index; n++, lv++) {
0749             memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
0750                    lv->length << L2DTSLOTSIZE);
0751         }
0752     } else {
0753         jfs_err("diWrite: UFO tlock");
0754     }
0755 
0756       inlineData:
0757     /*
0758      * copy inline symlink from in-memory inode to on-disk inode
0759      */
0760     if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
0761         lv = & dilinelock->lv[dilinelock->index];
0762         lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
0763         lv->length = 2;
0764         memcpy(&dp->di_inline_all, jfs_ip->i_inline_all, IDATASIZE);
0765         dilinelock->index++;
0766     }
0767     /*
0768      * copy inline data from in-memory inode to on-disk inode:
0769      * 128 byte slot granularity
0770      */
0771     if (test_cflag(COMMIT_Inlineea, ip)) {
0772         lv = & dilinelock->lv[dilinelock->index];
0773         lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
0774         lv->length = 1;
0775         memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
0776         dilinelock->index++;
0777 
0778         clear_cflag(COMMIT_Inlineea, ip);
0779     }
0780 
0781     /*
0782      *  lock/copy inode base: 128 byte slot granularity
0783      */
0784     lv = & dilinelock->lv[dilinelock->index];
0785     lv->offset = dioffset >> L2INODESLOTSIZE;
0786     copy_to_dinode(dp, ip);
0787     if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
0788         lv->length = 2;
0789         memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
0790     } else
0791         lv->length = 1;
0792     dilinelock->index++;
0793 
0794     /* release the buffer holding the updated on-disk inode.
0795      * the buffer will be later written by commit processing.
0796      */
0797     write_metapage(mp);
0798 
0799     return (rc);
0800 }
0801 
0802 
0803 /*
0804  * NAME:    diFree(ip)
0805  *
0806  * FUNCTION:    free a specified inode from the inode working map
0807  *      for a fileset or aggregate.
0808  *
0809  *      if the inode to be freed represents the first (only)
0810  *      free inode within the iag, the iag will be placed on
0811  *      the ag free inode list.
0812  *
0813  *      freeing the inode will cause the inode extent to be
0814  *      freed if the inode is the only allocated inode within
0815  *      the extent.  in this case all the disk resource backing
0816  *      up the inode extent will be freed. in addition, the iag
0817  *      will be placed on the ag extent free list if the extent
0818  *      is the first free extent in the iag.  if freeing the
0819  *      extent also means that no free inodes will exist for
0820  *      the iag, the iag will also be removed from the ag free
0821  *      inode list.
0822  *
0823  *      the iag describing the inode will be freed if the extent
0824  *      is to be freed and it is the only backed extent within
0825  *      the iag.  in this case, the iag will be removed from the
0826  *      ag free extent list and ag free inode list and placed on
0827  *      the inode map's free iag list.
0828  *
0829  *      a careful update approach is used to provide consistency
0830  *      in the face of updates to multiple buffers.  under this
0831  *      approach, all required buffers are obtained before making
0832  *      any updates and are held until all updates are complete.
0833  *
0834  * PARAMETERS:
0835  *  ip  - inode to be freed.
0836  *
0837  * RETURN VALUES:
0838  *  0   - success
0839  *  -EIO    - i/o error.
0840  */
0841 int diFree(struct inode *ip)
0842 {
0843     int rc;
0844     ino_t inum = ip->i_ino;
0845     struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
0846     struct metapage *mp, *amp, *bmp, *cmp, *dmp;
0847     int iagno, ino, extno, bitno, sword, agno;
0848     int back, fwd;
0849     u32 bitmap, mask;
0850     struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
0851     struct inomap *imap = JFS_IP(ipimap)->i_imap;
0852     pxd_t freepxd;
0853     tid_t tid;
0854     struct inode *iplist[3];
0855     struct tlock *tlck;
0856     struct pxd_lock *pxdlock;
0857 
0858     /*
0859      * This is just to suppress compiler warnings.  The same logic that
0860      * references these variables is used to initialize them.
0861      */
0862     aiagp = biagp = ciagp = diagp = NULL;
0863 
0864     /* get the iag number containing the inode.
0865      */
0866     iagno = INOTOIAG(inum);
0867 
0868     /* make sure that the iag is contained within
0869      * the map.
0870      */
0871     if (iagno >= imap->im_nextiag) {
0872         print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
0873                    imap, 32, 0);
0874         jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n",
0875               (uint) inum, iagno, imap->im_nextiag);
0876         return -EIO;
0877     }
0878 
0879     /* get the allocation group for this ino.
0880      */
0881     agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
0882 
0883     /* Lock the AG specific inode map information
0884      */
0885     AG_LOCK(imap, agno);
0886 
0887     /* Obtain read lock in imap inode.  Don't release it until we have
0888      * read all of the IAG's that we are going to.
0889      */
0890     IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
0891 
0892     /* read the iag.
0893      */
0894     if ((rc = diIAGRead(imap, iagno, &mp))) {
0895         IREAD_UNLOCK(ipimap);
0896         AG_UNLOCK(imap, agno);
0897         return (rc);
0898     }
0899     iagp = (struct iag *) mp->data;
0900 
0901     /* get the inode number and extent number of the inode within
0902      * the iag and the inode number within the extent.
0903      */
0904     ino = inum & (INOSPERIAG - 1);
0905     extno = ino >> L2INOSPEREXT;
0906     bitno = ino & (INOSPEREXT - 1);
0907     mask = HIGHORDER >> bitno;
0908 
0909     if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
0910         jfs_error(ip->i_sb, "wmap shows inode already free\n");
0911     }
0912 
0913     if (!addressPXD(&iagp->inoext[extno])) {
0914         release_metapage(mp);
0915         IREAD_UNLOCK(ipimap);
0916         AG_UNLOCK(imap, agno);
0917         jfs_error(ip->i_sb, "invalid inoext\n");
0918         return -EIO;
0919     }
0920 
0921     /* compute the bitmap for the extent reflecting the freed inode.
0922      */
0923     bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
0924 
0925     if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
0926         release_metapage(mp);
0927         IREAD_UNLOCK(ipimap);
0928         AG_UNLOCK(imap, agno);
0929         jfs_error(ip->i_sb, "numfree > numinos\n");
0930         return -EIO;
0931     }
0932     /*
0933      *  inode extent still has some inodes or below low water mark:
0934      *  keep the inode extent;
0935      */
0936     if (bitmap ||
0937         imap->im_agctl[agno].numfree < 96 ||
0938         (imap->im_agctl[agno].numfree < 288 &&
0939          (((imap->im_agctl[agno].numfree * 100) /
0940            imap->im_agctl[agno].numinos) <= 25))) {
0941         /* if the iag currently has no free inodes (i.e.,
0942          * the inode being freed is the first free inode of iag),
0943          * insert the iag at head of the inode free list for the ag.
0944          */
0945         if (iagp->nfreeinos == 0) {
0946             /* check if there are any iags on the ag inode
0947              * free list.  if so, read the first one so that
0948              * we can link the current iag onto the list at
0949              * the head.
0950              */
0951             if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
0952                 /* read the iag that currently is the head
0953                  * of the list.
0954                  */
0955                 if ((rc = diIAGRead(imap, fwd, &amp))) {
0956                     IREAD_UNLOCK(ipimap);
0957                     AG_UNLOCK(imap, agno);
0958                     release_metapage(mp);
0959                     return (rc);
0960                 }
0961                 aiagp = (struct iag *) amp->data;
0962 
0963                 /* make current head point back to the iag.
0964                  */
0965                 aiagp->inofreeback = cpu_to_le32(iagno);
0966 
0967                 write_metapage(amp);
0968             }
0969 
0970             /* iag points forward to current head and iag
0971              * becomes the new head of the list.
0972              */
0973             iagp->inofreefwd =
0974                 cpu_to_le32(imap->im_agctl[agno].inofree);
0975             iagp->inofreeback = cpu_to_le32(-1);
0976             imap->im_agctl[agno].inofree = iagno;
0977         }
0978         IREAD_UNLOCK(ipimap);
0979 
0980         /* update the free inode summary map for the extent if
0981          * freeing the inode means the extent will now have free
0982          * inodes (i.e., the inode being freed is the first free
0983          * inode of extent),
0984          */
0985         if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
0986             sword = extno >> L2EXTSPERSUM;
0987             bitno = extno & (EXTSPERSUM - 1);
0988             iagp->inosmap[sword] &=
0989                 cpu_to_le32(~(HIGHORDER >> bitno));
0990         }
0991 
0992         /* update the bitmap.
0993          */
0994         iagp->wmap[extno] = cpu_to_le32(bitmap);
0995 
0996         /* update the free inode counts at the iag, ag and
0997          * map level.
0998          */
0999         le32_add_cpu(&iagp->nfreeinos, 1);
1000         imap->im_agctl[agno].numfree += 1;
1001         atomic_inc(&imap->im_numfree);
1002 
1003         /* release the AG inode map lock
1004          */
1005         AG_UNLOCK(imap, agno);
1006 
1007         /* write the iag */
1008         write_metapage(mp);
1009 
1010         return (0);
1011     }
1012 
1013 
1014     /*
1015      *  inode extent has become free and above low water mark:
1016      *  free the inode extent;
1017      */
1018 
1019     /*
1020      *  prepare to update iag list(s) (careful update step 1)
1021      */
1022     amp = bmp = cmp = dmp = NULL;
1023     fwd = back = -1;
1024 
1025     /* check if the iag currently has no free extents.  if so,
1026      * it will be placed on the head of the ag extent free list.
1027      */
1028     if (iagp->nfreeexts == 0) {
1029         /* check if the ag extent free list has any iags.
1030          * if so, read the iag at the head of the list now.
1031          * this (head) iag will be updated later to reflect
1032          * the addition of the current iag at the head of
1033          * the list.
1034          */
1035         if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
1036             if ((rc = diIAGRead(imap, fwd, &amp)))
1037                 goto error_out;
1038             aiagp = (struct iag *) amp->data;
1039         }
1040     } else {
1041         /* iag has free extents. check if the addition of a free
1042          * extent will cause all extents to be free within this
1043          * iag.  if so, the iag will be removed from the ag extent
1044          * free list and placed on the inode map's free iag list.
1045          */
1046         if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1047             /* in preparation for removing the iag from the
1048              * ag extent free list, read the iags preceding
1049              * and following the iag on the ag extent free
1050              * list.
1051              */
1052             if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
1053                 if ((rc = diIAGRead(imap, fwd, &amp)))
1054                     goto error_out;
1055                 aiagp = (struct iag *) amp->data;
1056             }
1057 
1058             if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
1059                 if ((rc = diIAGRead(imap, back, &bmp)))
1060                     goto error_out;
1061                 biagp = (struct iag *) bmp->data;
1062             }
1063         }
1064     }
1065 
1066     /* remove the iag from the ag inode free list if freeing
1067      * this extent cause the iag to have no free inodes.
1068      */
1069     if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1070         int inofreeback = le32_to_cpu(iagp->inofreeback);
1071         int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1072 
1073         /* in preparation for removing the iag from the
1074          * ag inode free list, read the iags preceding
1075          * and following the iag on the ag inode free
1076          * list.  before reading these iags, we must make
1077          * sure that we already don't have them in hand
1078          * from up above, since re-reading an iag (buffer)
1079          * we are currently holding would cause a deadlock.
1080          */
1081         if (inofreefwd >= 0) {
1082 
1083             if (inofreefwd == fwd)
1084                 ciagp = (struct iag *) amp->data;
1085             else if (inofreefwd == back)
1086                 ciagp = (struct iag *) bmp->data;
1087             else {
1088                 if ((rc =
1089                      diIAGRead(imap, inofreefwd, &cmp)))
1090                     goto error_out;
1091                 ciagp = (struct iag *) cmp->data;
1092             }
1093             assert(ciagp != NULL);
1094         }
1095 
1096         if (inofreeback >= 0) {
1097             if (inofreeback == fwd)
1098                 diagp = (struct iag *) amp->data;
1099             else if (inofreeback == back)
1100                 diagp = (struct iag *) bmp->data;
1101             else {
1102                 if ((rc =
1103                      diIAGRead(imap, inofreeback, &dmp)))
1104                     goto error_out;
1105                 diagp = (struct iag *) dmp->data;
1106             }
1107             assert(diagp != NULL);
1108         }
1109     }
1110 
1111     IREAD_UNLOCK(ipimap);
1112 
1113     /*
1114      * invalidate any page of the inode extent freed from buffer cache;
1115      */
1116     freepxd = iagp->inoext[extno];
1117     invalidate_pxd_metapages(ip, freepxd);
1118 
1119     /*
1120      *  update iag list(s) (careful update step 2)
1121      */
1122     /* add the iag to the ag extent free list if this is the
1123      * first free extent for the iag.
1124      */
1125     if (iagp->nfreeexts == 0) {
1126         if (fwd >= 0)
1127             aiagp->extfreeback = cpu_to_le32(iagno);
1128 
1129         iagp->extfreefwd =
1130             cpu_to_le32(imap->im_agctl[agno].extfree);
1131         iagp->extfreeback = cpu_to_le32(-1);
1132         imap->im_agctl[agno].extfree = iagno;
1133     } else {
1134         /* remove the iag from the ag extent list if all extents
1135          * are now free and place it on the inode map iag free list.
1136          */
1137         if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1138             if (fwd >= 0)
1139                 aiagp->extfreeback = iagp->extfreeback;
1140 
1141             if (back >= 0)
1142                 biagp->extfreefwd = iagp->extfreefwd;
1143             else
1144                 imap->im_agctl[agno].extfree =
1145                     le32_to_cpu(iagp->extfreefwd);
1146 
1147             iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
1148 
1149             IAGFREE_LOCK(imap);
1150             iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1151             imap->im_freeiag = iagno;
1152             IAGFREE_UNLOCK(imap);
1153         }
1154     }
1155 
1156     /* remove the iag from the ag inode free list if freeing
1157      * this extent causes the iag to have no free inodes.
1158      */
1159     if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1160         if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
1161             ciagp->inofreeback = iagp->inofreeback;
1162 
1163         if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
1164             diagp->inofreefwd = iagp->inofreefwd;
1165         else
1166             imap->im_agctl[agno].inofree =
1167                 le32_to_cpu(iagp->inofreefwd);
1168 
1169         iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
1170     }
1171 
1172     /* update the inode extent address and working map
1173      * to reflect the free extent.
1174      * the permanent map should have been updated already
1175      * for the inode being freed.
1176      */
1177     if (iagp->pmap[extno] != 0) {
1178         jfs_error(ip->i_sb, "the pmap does not show inode free\n");
1179     }
1180     iagp->wmap[extno] = 0;
1181     PXDlength(&iagp->inoext[extno], 0);
1182     PXDaddress(&iagp->inoext[extno], 0);
1183 
1184     /* update the free extent and free inode summary maps
1185      * to reflect the freed extent.
1186      * the inode summary map is marked to indicate no inodes
1187      * available for the freed extent.
1188      */
1189     sword = extno >> L2EXTSPERSUM;
1190     bitno = extno & (EXTSPERSUM - 1);
1191     mask = HIGHORDER >> bitno;
1192     iagp->inosmap[sword] |= cpu_to_le32(mask);
1193     iagp->extsmap[sword] &= cpu_to_le32(~mask);
1194 
1195     /* update the number of free inodes and number of free extents
1196      * for the iag.
1197      */
1198     le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
1199     le32_add_cpu(&iagp->nfreeexts, 1);
1200 
1201     /* update the number of free inodes and backed inodes
1202      * at the ag and inode map level.
1203      */
1204     imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
1205     imap->im_agctl[agno].numinos -= INOSPEREXT;
1206     atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
1207     atomic_sub(INOSPEREXT, &imap->im_numinos);
1208 
1209     if (amp)
1210         write_metapage(amp);
1211     if (bmp)
1212         write_metapage(bmp);
1213     if (cmp)
1214         write_metapage(cmp);
1215     if (dmp)
1216         write_metapage(dmp);
1217 
1218     /*
1219      * start transaction to update block allocation map
1220      * for the inode extent freed;
1221      *
1222      * N.B. AG_LOCK is released and iag will be released below, and
1223      * other thread may allocate inode from/reusing the ixad freed
1224      * BUT with new/different backing inode extent from the extent
1225      * to be freed by the transaction;
1226      */
1227     tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
1228     mutex_lock(&JFS_IP(ipimap)->commit_mutex);
1229 
1230     /* acquire tlock of the iag page of the freed ixad
1231      * to force the page NOHOMEOK (even though no data is
1232      * logged from the iag page) until NOREDOPAGE|FREEXTENT log
1233      * for the free of the extent is committed;
1234      * write FREEXTENT|NOREDOPAGE log record
1235      * N.B. linelock is overlaid as freed extent descriptor;
1236      */
1237     tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
1238     pxdlock = (struct pxd_lock *) & tlck->lock;
1239     pxdlock->flag = mlckFREEPXD;
1240     pxdlock->pxd = freepxd;
1241     pxdlock->index = 1;
1242 
1243     write_metapage(mp);
1244 
1245     iplist[0] = ipimap;
1246 
1247     /*
1248      * logredo needs the IAG number and IAG extent index in order
1249      * to ensure that the IMap is consistent.  The least disruptive
1250      * way to pass these values through  to the transaction manager
1251      * is in the iplist array.
1252      *
1253      * It's not pretty, but it works.
1254      */
1255     iplist[1] = (struct inode *) (size_t)iagno;
1256     iplist[2] = (struct inode *) (size_t)extno;
1257 
1258     rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
1259 
1260     txEnd(tid);
1261     mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
1262 
1263     /* unlock the AG inode map information */
1264     AG_UNLOCK(imap, agno);
1265 
1266     return (0);
1267 
1268       error_out:
1269     IREAD_UNLOCK(ipimap);
1270 
1271     if (amp)
1272         release_metapage(amp);
1273     if (bmp)
1274         release_metapage(bmp);
1275     if (cmp)
1276         release_metapage(cmp);
1277     if (dmp)
1278         release_metapage(dmp);
1279 
1280     AG_UNLOCK(imap, agno);
1281 
1282     release_metapage(mp);
1283 
1284     return (rc);
1285 }
1286 
1287 /*
1288  * There are several places in the diAlloc* routines where we initialize
1289  * the inode.
1290  */
1291 static inline void
1292 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1293 {
1294     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1295 
1296     ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1297     jfs_ip->ixpxd = iagp->inoext[extno];
1298     jfs_ip->agstart = le64_to_cpu(iagp->agstart);
1299     jfs_ip->active_ag = -1;
1300 }
1301 
1302 
1303 /*
1304  * NAME:    diAlloc(pip,dir,ip)
1305  *
1306  * FUNCTION:    allocate a disk inode from the inode working map
1307  *      for a fileset or aggregate.
1308  *
1309  * PARAMETERS:
1310  *  pip - pointer to incore inode for the parent inode.
1311  *  dir - 'true' if the new disk inode is for a directory.
1312  *  ip  - pointer to a new inode
1313  *
1314  * RETURN VALUES:
1315  *  0   - success.
1316  *  -ENOSPC - insufficient disk resources.
1317  *  -EIO    - i/o error.
1318  */
1319 int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1320 {
1321     int rc, ino, iagno, addext, extno, bitno, sword;
1322     int nwords, rem, i, agno;
1323     u32 mask, inosmap, extsmap;
1324     struct inode *ipimap;
1325     struct metapage *mp;
1326     ino_t inum;
1327     struct iag *iagp;
1328     struct inomap *imap;
1329 
1330     /* get the pointers to the inode map inode and the
1331      * corresponding imap control structure.
1332      */
1333     ipimap = JFS_SBI(pip->i_sb)->ipimap;
1334     imap = JFS_IP(ipimap)->i_imap;
1335     JFS_IP(ip)->ipimap = ipimap;
1336     JFS_IP(ip)->fileset = FILESYSTEM_I;
1337 
1338     /* for a directory, the allocation policy is to start
1339      * at the ag level using the preferred ag.
1340      */
1341     if (dir) {
1342         agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1343         AG_LOCK(imap, agno);
1344         goto tryag;
1345     }
1346 
1347     /* for files, the policy starts off by trying to allocate from
1348      * the same iag containing the parent disk inode:
1349      * try to allocate the new disk inode close to the parent disk
1350      * inode, using parent disk inode number + 1 as the allocation
1351      * hint.  (we use a left-to-right policy to attempt to avoid
1352      * moving backward on the disk.)  compute the hint within the
1353      * file system and the iag.
1354      */
1355 
1356     /* get the ag number of this iag */
1357     agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
1358 
1359     if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
1360         /*
1361          * There is an open file actively growing.  We want to
1362          * allocate new inodes from a different ag to avoid
1363          * fragmentation problems.
1364          */
1365         agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1366         AG_LOCK(imap, agno);
1367         goto tryag;
1368     }
1369 
1370     inum = pip->i_ino + 1;
1371     ino = inum & (INOSPERIAG - 1);
1372 
1373     /* back off the hint if it is outside of the iag */
1374     if (ino == 0)
1375         inum = pip->i_ino;
1376 
1377     /* lock the AG inode map information */
1378     AG_LOCK(imap, agno);
1379 
1380     /* Get read lock on imap inode */
1381     IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
1382 
1383     /* get the iag number and read the iag */
1384     iagno = INOTOIAG(inum);
1385     if ((rc = diIAGRead(imap, iagno, &mp))) {
1386         IREAD_UNLOCK(ipimap);
1387         AG_UNLOCK(imap, agno);
1388         return (rc);
1389     }
1390     iagp = (struct iag *) mp->data;
1391 
1392     /* determine if new inode extent is allowed to be added to the iag.
1393      * new inode extent can be added to the iag if the ag
1394      * has less than 32 free disk inodes and the iag has free extents.
1395      */
1396     addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1397 
1398     /*
1399      *  try to allocate from the IAG
1400      */
1401     /* check if the inode may be allocated from the iag
1402      * (i.e. the inode has free inodes or new extent can be added).
1403      */
1404     if (iagp->nfreeinos || addext) {
1405         /* determine the extent number of the hint.
1406          */
1407         extno = ino >> L2INOSPEREXT;
1408 
1409         /* check if the extent containing the hint has backed
1410          * inodes.  if so, try to allocate within this extent.
1411          */
1412         if (addressPXD(&iagp->inoext[extno])) {
1413             bitno = ino & (INOSPEREXT - 1);
1414             if ((bitno =
1415                  diFindFree(le32_to_cpu(iagp->wmap[extno]),
1416                     bitno))
1417                 < INOSPEREXT) {
1418                 ino = (extno << L2INOSPEREXT) + bitno;
1419 
1420                 /* a free inode (bit) was found within this
1421                  * extent, so allocate it.
1422                  */
1423                 rc = diAllocBit(imap, iagp, ino);
1424                 IREAD_UNLOCK(ipimap);
1425                 if (rc) {
1426                     assert(rc == -EIO);
1427                 } else {
1428                     /* set the results of the allocation
1429                      * and write the iag.
1430                      */
1431                     diInitInode(ip, iagno, ino, extno,
1432                             iagp);
1433                     mark_metapage_dirty(mp);
1434                 }
1435                 release_metapage(mp);
1436 
1437                 /* free the AG lock and return.
1438                  */
1439                 AG_UNLOCK(imap, agno);
1440                 return (rc);
1441             }
1442 
1443             if (!addext)
1444                 extno =
1445                     (extno ==
1446                      EXTSPERIAG - 1) ? 0 : extno + 1;
1447         }
1448 
1449         /*
1450          * no free inodes within the extent containing the hint.
1451          *
1452          * try to allocate from the backed extents following
1453          * hint or, if appropriate (i.e. addext is true), allocate
1454          * an extent of free inodes at or following the extent
1455          * containing the hint.
1456          *
1457          * the free inode and free extent summary maps are used
1458          * here, so determine the starting summary map position
1459          * and the number of words we'll have to examine.  again,
1460          * the approach is to allocate following the hint, so we
1461          * might have to initially ignore prior bits of the summary
1462          * map that represent extents prior to the extent containing
1463          * the hint and later revisit these bits.
1464          */
1465         bitno = extno & (EXTSPERSUM - 1);
1466         nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
1467         sword = extno >> L2EXTSPERSUM;
1468 
1469         /* mask any prior bits for the starting words of the
1470          * summary map.
1471          */
1472         mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno));
1473         inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
1474         extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
1475 
1476         /* scan the free inode and free extent summary maps for
1477          * free resources.
1478          */
1479         for (i = 0; i < nwords; i++) {
1480             /* check if this word of the free inode summary
1481              * map describes an extent with free inodes.
1482              */
1483             if (~inosmap) {
1484                 /* an extent with free inodes has been
1485                  * found. determine the extent number
1486                  * and the inode number within the extent.
1487                  */
1488                 rem = diFindFree(inosmap, 0);
1489                 extno = (sword << L2EXTSPERSUM) + rem;
1490                 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
1491                          0);
1492                 if (rem >= INOSPEREXT) {
1493                     IREAD_UNLOCK(ipimap);
1494                     release_metapage(mp);
1495                     AG_UNLOCK(imap, agno);
1496                     jfs_error(ip->i_sb,
1497                           "can't find free bit in wmap\n");
1498                     return -EIO;
1499                 }
1500 
1501                 /* determine the inode number within the
1502                  * iag and allocate the inode from the
1503                  * map.
1504                  */
1505                 ino = (extno << L2INOSPEREXT) + rem;
1506                 rc = diAllocBit(imap, iagp, ino);
1507                 IREAD_UNLOCK(ipimap);
1508                 if (rc)
1509                     assert(rc == -EIO);
1510                 else {
1511                     /* set the results of the allocation
1512                      * and write the iag.
1513                      */
1514                     diInitInode(ip, iagno, ino, extno,
1515                             iagp);
1516                     mark_metapage_dirty(mp);
1517                 }
1518                 release_metapage(mp);
1519 
1520                 /* free the AG lock and return.
1521                  */
1522                 AG_UNLOCK(imap, agno);
1523                 return (rc);
1524 
1525             }
1526 
1527             /* check if we may allocate an extent of free
1528              * inodes and whether this word of the free
1529              * extents summary map describes a free extent.
1530              */
1531             if (addext && ~extsmap) {
1532                 /* a free extent has been found.  determine
1533                  * the extent number.
1534                  */
1535                 rem = diFindFree(extsmap, 0);
1536                 extno = (sword << L2EXTSPERSUM) + rem;
1537 
1538                 /* allocate an extent of free inodes.
1539                  */
1540                 if ((rc = diNewExt(imap, iagp, extno))) {
1541                     /* if there is no disk space for a
1542                      * new extent, try to allocate the
1543                      * disk inode from somewhere else.
1544                      */
1545                     if (rc == -ENOSPC)
1546                         break;
1547 
1548                     assert(rc == -EIO);
1549                 } else {
1550                     /* set the results of the allocation
1551                      * and write the iag.
1552                      */
1553                     diInitInode(ip, iagno,
1554                             extno << L2INOSPEREXT,
1555                             extno, iagp);
1556                     mark_metapage_dirty(mp);
1557                 }
1558                 release_metapage(mp);
1559                 /* free the imap inode & the AG lock & return.
1560                  */
1561                 IREAD_UNLOCK(ipimap);
1562                 AG_UNLOCK(imap, agno);
1563                 return (rc);
1564             }
1565 
1566             /* move on to the next set of summary map words.
1567              */
1568             sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
1569             inosmap = le32_to_cpu(iagp->inosmap[sword]);
1570             extsmap = le32_to_cpu(iagp->extsmap[sword]);
1571         }
1572     }
1573     /* unlock imap inode */
1574     IREAD_UNLOCK(ipimap);
1575 
1576     /* nothing doing in this iag, so release it. */
1577     release_metapage(mp);
1578 
1579       tryag:
1580     /*
1581      * try to allocate anywhere within the same AG as the parent inode.
1582      */
1583     rc = diAllocAG(imap, agno, dir, ip);
1584 
1585     AG_UNLOCK(imap, agno);
1586 
1587     if (rc != -ENOSPC)
1588         return (rc);
1589 
1590     /*
1591      * try to allocate in any AG.
1592      */
1593     return (diAllocAny(imap, agno, dir, ip));
1594 }
1595 
1596 
1597 /*
1598  * NAME:    diAllocAG(imap,agno,dir,ip)
1599  *
1600  * FUNCTION:    allocate a disk inode from the allocation group.
1601  *
1602  *      this routine first determines if a new extent of free
1603  *      inodes should be added for the allocation group, with
1604  *      the current request satisfied from this extent. if this
1605  *      is the case, an attempt will be made to do just that.  if
1606  *      this attempt fails or it has been determined that a new
1607  *      extent should not be added, an attempt is made to satisfy
1608  *      the request by allocating an existing (backed) free inode
1609  *      from the allocation group.
1610  *
1611  * PRE CONDITION: Already have the AG lock for this AG.
1612  *
1613  * PARAMETERS:
1614  *  imap    - pointer to inode map control structure.
1615  *  agno    - allocation group to allocate from.
1616  *  dir - 'true' if the new disk inode is for a directory.
1617  *  ip  - pointer to the new inode to be filled in on successful return
1618  *        with the disk inode number allocated, its extent address
1619  *        and the start of the ag.
1620  *
1621  * RETURN VALUES:
1622  *  0   - success.
1623  *  -ENOSPC - insufficient disk resources.
1624  *  -EIO    - i/o error.
1625  */
1626 static int
1627 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1628 {
1629     int rc, addext, numfree, numinos;
1630 
1631     /* get the number of free and the number of backed disk
1632      * inodes currently within the ag.
1633      */
1634     numfree = imap->im_agctl[agno].numfree;
1635     numinos = imap->im_agctl[agno].numinos;
1636 
1637     if (numfree > numinos) {
1638         jfs_error(ip->i_sb, "numfree > numinos\n");
1639         return -EIO;
1640     }
1641 
1642     /* determine if we should allocate a new extent of free inodes
1643      * within the ag: for directory inodes, add a new extent
1644      * if there are a small number of free inodes or number of free
1645      * inodes is a small percentage of the number of backed inodes.
1646      */
1647     if (dir)
1648         addext = (numfree < 64 ||
1649               (numfree < 256
1650                && ((numfree * 100) / numinos) <= 20));
1651     else
1652         addext = (numfree == 0);
1653 
1654     /*
1655      * try to allocate a new extent of free inodes.
1656      */
1657     if (addext) {
1658         /* if free space is not available for this new extent, try
1659          * below to allocate a free and existing (already backed)
1660          * inode from the ag.
1661          */
1662         if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
1663             return (rc);
1664     }
1665 
1666     /*
1667      * try to allocate an existing free inode from the ag.
1668      */
1669     return (diAllocIno(imap, agno, ip));
1670 }
1671 
1672 
1673 /*
1674  * NAME:    diAllocAny(imap,agno,dir,iap)
1675  *
1676  * FUNCTION:    allocate a disk inode from any other allocation group.
1677  *
1678  *      this routine is called when an allocation attempt within
1679  *      the primary allocation group has failed. if attempts to
1680  *      allocate an inode from any allocation group other than the
1681  *      specified primary group.
1682  *
1683  * PARAMETERS:
1684  *  imap    - pointer to inode map control structure.
1685  *  agno    - primary allocation group (to avoid).
1686  *  dir - 'true' if the new disk inode is for a directory.
1687  *  ip  - pointer to a new inode to be filled in on successful return
1688  *        with the disk inode number allocated, its extent address
1689  *        and the start of the ag.
1690  *
1691  * RETURN VALUES:
1692  *  0   - success.
1693  *  -ENOSPC - insufficient disk resources.
1694  *  -EIO    - i/o error.
1695  */
1696 static int
1697 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1698 {
1699     int ag, rc;
1700     int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
1701 
1702 
1703     /* try to allocate from the ags following agno up to
1704      * the maximum ag number.
1705      */
1706     for (ag = agno + 1; ag <= maxag; ag++) {
1707         AG_LOCK(imap, ag);
1708 
1709         rc = diAllocAG(imap, ag, dir, ip);
1710 
1711         AG_UNLOCK(imap, ag);
1712 
1713         if (rc != -ENOSPC)
1714             return (rc);
1715     }
1716 
1717     /* try to allocate from the ags in front of agno.
1718      */
1719     for (ag = 0; ag < agno; ag++) {
1720         AG_LOCK(imap, ag);
1721 
1722         rc = diAllocAG(imap, ag, dir, ip);
1723 
1724         AG_UNLOCK(imap, ag);
1725 
1726         if (rc != -ENOSPC)
1727             return (rc);
1728     }
1729 
1730     /* no free disk inodes.
1731      */
1732     return -ENOSPC;
1733 }
1734 
1735 
1736 /*
1737  * NAME:    diAllocIno(imap,agno,ip)
1738  *
1739  * FUNCTION:    allocate a disk inode from the allocation group's free
1740  *      inode list, returning an error if this free list is
1741  *      empty (i.e. no iags on the list).
1742  *
1743  *      allocation occurs from the first iag on the list using
1744  *      the iag's free inode summary map to find the leftmost
1745  *      free inode in the iag.
1746  *
1747  * PRE CONDITION: Already have AG lock for this AG.
1748  *
1749  * PARAMETERS:
1750  *  imap    - pointer to inode map control structure.
1751  *  agno    - allocation group.
1752  *  ip  - pointer to new inode to be filled in on successful return
1753  *        with the disk inode number allocated, its extent address
1754  *        and the start of the ag.
1755  *
1756  * RETURN VALUES:
1757  *  0   - success.
1758  *  -ENOSPC - insufficient disk resources.
1759  *  -EIO    - i/o error.
1760  */
1761 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1762 {
1763     int iagno, ino, rc, rem, extno, sword;
1764     struct metapage *mp;
1765     struct iag *iagp;
1766 
1767     /* check if there are iags on the ag's free inode list.
1768      */
1769     if ((iagno = imap->im_agctl[agno].inofree) < 0)
1770         return -ENOSPC;
1771 
1772     /* obtain read lock on imap inode */
1773     IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1774 
1775     /* read the iag at the head of the list.
1776      */
1777     if ((rc = diIAGRead(imap, iagno, &mp))) {
1778         IREAD_UNLOCK(imap->im_ipimap);
1779         return (rc);
1780     }
1781     iagp = (struct iag *) mp->data;
1782 
1783     /* better be free inodes in this iag if it is on the
1784      * list.
1785      */
1786     if (!iagp->nfreeinos) {
1787         IREAD_UNLOCK(imap->im_ipimap);
1788         release_metapage(mp);
1789         jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n");
1790         return -EIO;
1791     }
1792 
1793     /* scan the free inode summary map to find an extent
1794      * with free inodes.
1795      */
1796     for (sword = 0;; sword++) {
1797         if (sword >= SMAPSZ) {
1798             IREAD_UNLOCK(imap->im_ipimap);
1799             release_metapage(mp);
1800             jfs_error(ip->i_sb,
1801                   "free inode not found in summary map\n");
1802             return -EIO;
1803         }
1804 
1805         if (~iagp->inosmap[sword])
1806             break;
1807     }
1808 
1809     /* found a extent with free inodes. determine
1810      * the extent number.
1811      */
1812     rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
1813     if (rem >= EXTSPERSUM) {
1814         IREAD_UNLOCK(imap->im_ipimap);
1815         release_metapage(mp);
1816         jfs_error(ip->i_sb, "no free extent found\n");
1817         return -EIO;
1818     }
1819     extno = (sword << L2EXTSPERSUM) + rem;
1820 
1821     /* find the first free inode in the extent.
1822      */
1823     rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
1824     if (rem >= INOSPEREXT) {
1825         IREAD_UNLOCK(imap->im_ipimap);
1826         release_metapage(mp);
1827         jfs_error(ip->i_sb, "free inode not found\n");
1828         return -EIO;
1829     }
1830 
1831     /* compute the inode number within the iag.
1832      */
1833     ino = (extno << L2INOSPEREXT) + rem;
1834 
1835     /* allocate the inode.
1836      */
1837     rc = diAllocBit(imap, iagp, ino);
1838     IREAD_UNLOCK(imap->im_ipimap);
1839     if (rc) {
1840         release_metapage(mp);
1841         return (rc);
1842     }
1843 
1844     /* set the results of the allocation and write the iag.
1845      */
1846     diInitInode(ip, iagno, ino, extno, iagp);
1847     write_metapage(mp);
1848 
1849     return (0);
1850 }
1851 
1852 
1853 /*
1854  * NAME:    diAllocExt(imap,agno,ip)
1855  *
1856  * FUNCTION:    add a new extent of free inodes to an iag, allocating
1857  *      an inode from this extent to satisfy the current allocation
1858  *      request.
1859  *
1860  *      this routine first tries to find an existing iag with free
1861  *      extents through the ag free extent list.  if list is not
1862  *      empty, the head of the list will be selected as the home
1863  *      of the new extent of free inodes.  otherwise (the list is
1864  *      empty), a new iag will be allocated for the ag to contain
1865  *      the extent.
1866  *
1867  *      once an iag has been selected, the free extent summary map
1868  *      is used to locate a free extent within the iag and diNewExt()
1869  *      is called to initialize the extent, with initialization
1870  *      including the allocation of the first inode of the extent
1871  *      for the purpose of satisfying this request.
1872  *
1873  * PARAMETERS:
1874  *  imap    - pointer to inode map control structure.
1875  *  agno    - allocation group number.
1876  *  ip  - pointer to new inode to be filled in on successful return
1877  *        with the disk inode number allocated, its extent address
1878  *        and the start of the ag.
1879  *
1880  * RETURN VALUES:
1881  *  0   - success.
1882  *  -ENOSPC - insufficient disk resources.
1883  *  -EIO    - i/o error.
1884  */
1885 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1886 {
1887     int rem, iagno, sword, extno, rc;
1888     struct metapage *mp;
1889     struct iag *iagp;
1890 
1891     /* check if the ag has any iags with free extents.  if not,
1892      * allocate a new iag for the ag.
1893      */
1894     if ((iagno = imap->im_agctl[agno].extfree) < 0) {
1895         /* If successful, diNewIAG will obtain the read lock on the
1896          * imap inode.
1897          */
1898         if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
1899             return (rc);
1900         }
1901         iagp = (struct iag *) mp->data;
1902 
1903         /* set the ag number if this a brand new iag
1904          */
1905         iagp->agstart =
1906             cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
1907     } else {
1908         /* read the iag.
1909          */
1910         IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1911         if ((rc = diIAGRead(imap, iagno, &mp))) {
1912             IREAD_UNLOCK(imap->im_ipimap);
1913             jfs_error(ip->i_sb, "error reading iag\n");
1914             return rc;
1915         }
1916         iagp = (struct iag *) mp->data;
1917     }
1918 
1919     /* using the free extent summary map, find a free extent.
1920      */
1921     for (sword = 0;; sword++) {
1922         if (sword >= SMAPSZ) {
1923             release_metapage(mp);
1924             IREAD_UNLOCK(imap->im_ipimap);
1925             jfs_error(ip->i_sb, "free ext summary map not found\n");
1926             return -EIO;
1927         }
1928         if (~iagp->extsmap[sword])
1929             break;
1930     }
1931 
1932     /* determine the extent number of the free extent.
1933      */
1934     rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
1935     if (rem >= EXTSPERSUM) {
1936         release_metapage(mp);
1937         IREAD_UNLOCK(imap->im_ipimap);
1938         jfs_error(ip->i_sb, "free extent not found\n");
1939         return -EIO;
1940     }
1941     extno = (sword << L2EXTSPERSUM) + rem;
1942 
1943     /* initialize the new extent.
1944      */
1945     rc = diNewExt(imap, iagp, extno);
1946     IREAD_UNLOCK(imap->im_ipimap);
1947     if (rc) {
1948         /* something bad happened.  if a new iag was allocated,
1949          * place it back on the inode map's iag free list, and
1950          * clear the ag number information.
1951          */
1952         if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
1953             IAGFREE_LOCK(imap);
1954             iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1955             imap->im_freeiag = iagno;
1956             IAGFREE_UNLOCK(imap);
1957         }
1958         write_metapage(mp);
1959         return (rc);
1960     }
1961 
1962     /* set the results of the allocation and write the iag.
1963      */
1964     diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
1965 
1966     write_metapage(mp);
1967 
1968     return (0);
1969 }
1970 
1971 
1972 /*
1973  * NAME:    diAllocBit(imap,iagp,ino)
1974  *
1975  * FUNCTION:    allocate a backed inode from an iag.
1976  *
1977  *      this routine performs the mechanics of allocating a
1978  *      specified inode from a backed extent.
1979  *
1980  *      if the inode to be allocated represents the last free
1981  *      inode within the iag, the iag will be removed from the
1982  *      ag free inode list.
1983  *
1984  *      a careful update approach is used to provide consistency
1985  *      in the face of updates to multiple buffers.  under this
1986  *      approach, all required buffers are obtained before making
1987  *      any updates and are held all are updates are complete.
1988  *
1989  * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
1990  *  this AG.  Must have read lock on imap inode.
1991  *
1992  * PARAMETERS:
1993  *  imap    - pointer to inode map control structure.
1994  *  iagp    - pointer to iag.
1995  *  ino - inode number to be allocated within the iag.
1996  *
1997  * RETURN VALUES:
1998  *  0   - success.
1999  *  -ENOSPC - insufficient disk resources.
2000  *  -EIO    - i/o error.
2001  */
2002 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2003 {
2004     int extno, bitno, agno, sword, rc;
2005     struct metapage *amp = NULL, *bmp = NULL;
2006     struct iag *aiagp = NULL, *biagp = NULL;
2007     u32 mask;
2008 
2009     /* check if this is the last free inode within the iag.
2010      * if so, it will have to be removed from the ag free
2011      * inode list, so get the iags preceding and following
2012      * it on the list.
2013      */
2014     if (iagp->nfreeinos == cpu_to_le32(1)) {
2015         if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
2016             if ((rc =
2017                  diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
2018                        &amp)))
2019                 return (rc);
2020             aiagp = (struct iag *) amp->data;
2021         }
2022 
2023         if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
2024             if ((rc =
2025                  diIAGRead(imap,
2026                        le32_to_cpu(iagp->inofreeback),
2027                        &bmp))) {
2028                 if (amp)
2029                     release_metapage(amp);
2030                 return (rc);
2031             }
2032             biagp = (struct iag *) bmp->data;
2033         }
2034     }
2035 
2036     /* get the ag number, extent number, inode number within
2037      * the extent.
2038      */
2039     agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
2040     extno = ino >> L2INOSPEREXT;
2041     bitno = ino & (INOSPEREXT - 1);
2042 
2043     /* compute the mask for setting the map.
2044      */
2045     mask = HIGHORDER >> bitno;
2046 
2047     /* the inode should be free and backed.
2048      */
2049     if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) ||
2050         ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) ||
2051         (addressPXD(&iagp->inoext[extno]) == 0)) {
2052         if (amp)
2053             release_metapage(amp);
2054         if (bmp)
2055             release_metapage(bmp);
2056 
2057         jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n");
2058         return -EIO;
2059     }
2060 
2061     /* mark the inode as allocated in the working map.
2062      */
2063     iagp->wmap[extno] |= cpu_to_le32(mask);
2064 
2065     /* check if all inodes within the extent are now
2066      * allocated.  if so, update the free inode summary
2067      * map to reflect this.
2068      */
2069     if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
2070         sword = extno >> L2EXTSPERSUM;
2071         bitno = extno & (EXTSPERSUM - 1);
2072         iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
2073     }
2074 
2075     /* if this was the last free inode in the iag, remove the
2076      * iag from the ag free inode list.
2077      */
2078     if (iagp->nfreeinos == cpu_to_le32(1)) {
2079         if (amp) {
2080             aiagp->inofreeback = iagp->inofreeback;
2081             write_metapage(amp);
2082         }
2083 
2084         if (bmp) {
2085             biagp->inofreefwd = iagp->inofreefwd;
2086             write_metapage(bmp);
2087         } else {
2088             imap->im_agctl[agno].inofree =
2089                 le32_to_cpu(iagp->inofreefwd);
2090         }
2091         iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2092     }
2093 
2094     /* update the free inode count at the iag, ag, inode
2095      * map levels.
2096      */
2097     le32_add_cpu(&iagp->nfreeinos, -1);
2098     imap->im_agctl[agno].numfree -= 1;
2099     atomic_dec(&imap->im_numfree);
2100 
2101     return (0);
2102 }
2103 
2104 
2105 /*
2106  * NAME:    diNewExt(imap,iagp,extno)
2107  *
2108  * FUNCTION:    initialize a new extent of inodes for an iag, allocating
2109  *      the first inode of the extent for use for the current
2110  *      allocation request.
2111  *
2112  *      disk resources are allocated for the new extent of inodes
2113  *      and the inodes themselves are initialized to reflect their
2114  *      existence within the extent (i.e. their inode numbers and
2115  *      inode extent addresses are set) and their initial state
2116  *      (mode and link count are set to zero).
2117  *
2118  *      if the iag is new, it is not yet on an ag extent free list
2119  *      but will now be placed on this list.
2120  *
2121  *      if the allocation of the new extent causes the iag to
2122  *      have no free extent, the iag will be removed from the
2123  *      ag extent free list.
2124  *
2125  *      if the iag has no free backed inodes, it will be placed
2126  *      on the ag free inode list, since the addition of the new
2127  *      extent will now cause it to have free inodes.
2128  *
2129  *      a careful update approach is used to provide consistency
2130  *      (i.e. list consistency) in the face of updates to multiple
2131  *      buffers.  under this approach, all required buffers are
2132  *      obtained before making any updates and are held until all
2133  *      updates are complete.
2134  *
2135  * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
2136  *  this AG.  Must have read lock on imap inode.
2137  *
2138  * PARAMETERS:
2139  *  imap    - pointer to inode map control structure.
2140  *  iagp    - pointer to iag.
2141  *  extno   - extent number.
2142  *
2143  * RETURN VALUES:
2144  *  0   - success.
2145  *  -ENOSPC - insufficient disk resources.
2146  *  -EIO    - i/o error.
2147  */
2148 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2149 {
2150     int agno, iagno, fwd, back, freei = 0, sword, rc;
2151     struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL;
2152     struct metapage *amp, *bmp, *cmp, *dmp;
2153     struct inode *ipimap;
2154     s64 blkno, hint;
2155     int i, j;
2156     u32 mask;
2157     ino_t ino;
2158     struct dinode *dp;
2159     struct jfs_sb_info *sbi;
2160 
2161     /* better have free extents.
2162      */
2163     if (!iagp->nfreeexts) {
2164         jfs_error(imap->im_ipimap->i_sb, "no free extents\n");
2165         return -EIO;
2166     }
2167 
2168     /* get the inode map inode.
2169      */
2170     ipimap = imap->im_ipimap;
2171     sbi = JFS_SBI(ipimap->i_sb);
2172 
2173     amp = bmp = cmp = NULL;
2174 
2175     /* get the ag and iag numbers for this iag.
2176      */
2177     agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
2178     iagno = le32_to_cpu(iagp->iagnum);
2179 
2180     /* check if this is the last free extent within the
2181      * iag.  if so, the iag must be removed from the ag
2182      * free extent list, so get the iags preceding and
2183      * following the iag on this list.
2184      */
2185     if (iagp->nfreeexts == cpu_to_le32(1)) {
2186         if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
2187             if ((rc = diIAGRead(imap, fwd, &amp)))
2188                 return (rc);
2189             aiagp = (struct iag *) amp->data;
2190         }
2191 
2192         if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
2193             if ((rc = diIAGRead(imap, back, &bmp)))
2194                 goto error_out;
2195             biagp = (struct iag *) bmp->data;
2196         }
2197     } else {
2198         /* the iag has free extents.  if all extents are free
2199          * (as is the case for a newly allocated iag), the iag
2200          * must be added to the ag free extent list, so get
2201          * the iag at the head of the list in preparation for
2202          * adding this iag to this list.
2203          */
2204         fwd = back = -1;
2205         if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2206             if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
2207                 if ((rc = diIAGRead(imap, fwd, &amp)))
2208                     goto error_out;
2209                 aiagp = (struct iag *) amp->data;
2210             }
2211         }
2212     }
2213 
2214     /* check if the iag has no free inodes.  if so, the iag
2215      * will have to be added to the ag free inode list, so get
2216      * the iag at the head of the list in preparation for
2217      * adding this iag to this list.  in doing this, we must
2218      * check if we already have the iag at the head of
2219      * the list in hand.
2220      */
2221     if (iagp->nfreeinos == 0) {
2222         freei = imap->im_agctl[agno].inofree;
2223 
2224         if (freei >= 0) {
2225             if (freei == fwd) {
2226                 ciagp = aiagp;
2227             } else if (freei == back) {
2228                 ciagp = biagp;
2229             } else {
2230                 if ((rc = diIAGRead(imap, freei, &cmp)))
2231                     goto error_out;
2232                 ciagp = (struct iag *) cmp->data;
2233             }
2234             if (ciagp == NULL) {
2235                 jfs_error(imap->im_ipimap->i_sb,
2236                       "ciagp == NULL\n");
2237                 rc = -EIO;
2238                 goto error_out;
2239             }
2240         }
2241     }
2242 
2243     /* allocate disk space for the inode extent.
2244      */
2245     if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
2246         hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
2247     else
2248         hint = addressPXD(&iagp->inoext[extno - 1]) +
2249             lengthPXD(&iagp->inoext[extno - 1]) - 1;
2250 
2251     if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
2252         goto error_out;
2253 
2254     /* compute the inode number of the first inode within the
2255      * extent.
2256      */
2257     ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
2258 
2259     /* initialize the inodes within the newly allocated extent a
2260      * page at a time.
2261      */
2262     for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
2263         /* get a buffer for this page of disk inodes.
2264          */
2265         dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
2266         if (dmp == NULL) {
2267             rc = -EIO;
2268             goto error_out;
2269         }
2270         dp = (struct dinode *) dmp->data;
2271 
2272         /* initialize the inode number, mode, link count and
2273          * inode extent address.
2274          */
2275         for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
2276             dp->di_inostamp = cpu_to_le32(sbi->inostamp);
2277             dp->di_number = cpu_to_le32(ino);
2278             dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
2279             dp->di_mode = 0;
2280             dp->di_nlink = 0;
2281             PXDaddress(&(dp->di_ixpxd), blkno);
2282             PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
2283         }
2284         write_metapage(dmp);
2285     }
2286 
2287     /* if this is the last free extent within the iag, remove the
2288      * iag from the ag free extent list.
2289      */
2290     if (iagp->nfreeexts == cpu_to_le32(1)) {
2291         if (fwd >= 0)
2292             aiagp->extfreeback = iagp->extfreeback;
2293 
2294         if (back >= 0)
2295             biagp->extfreefwd = iagp->extfreefwd;
2296         else
2297             imap->im_agctl[agno].extfree =
2298                 le32_to_cpu(iagp->extfreefwd);
2299 
2300         iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2301     } else {
2302         /* if the iag has all free extents (newly allocated iag),
2303          * add the iag to the ag free extent list.
2304          */
2305         if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2306             if (fwd >= 0)
2307                 aiagp->extfreeback = cpu_to_le32(iagno);
2308 
2309             iagp->extfreefwd = cpu_to_le32(fwd);
2310             iagp->extfreeback = cpu_to_le32(-1);
2311             imap->im_agctl[agno].extfree = iagno;
2312         }
2313     }
2314 
2315     /* if the iag has no free inodes, add the iag to the
2316      * ag free inode list.
2317      */
2318     if (iagp->nfreeinos == 0) {
2319         if (freei >= 0)
2320             ciagp->inofreeback = cpu_to_le32(iagno);
2321 
2322         iagp->inofreefwd =
2323             cpu_to_le32(imap->im_agctl[agno].inofree);
2324         iagp->inofreeback = cpu_to_le32(-1);
2325         imap->im_agctl[agno].inofree = iagno;
2326     }
2327 
2328     /* initialize the extent descriptor of the extent. */
2329     PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
2330     PXDaddress(&iagp->inoext[extno], blkno);
2331 
2332     /* initialize the working and persistent map of the extent.
2333      * the working map will be initialized such that
2334      * it indicates the first inode of the extent is allocated.
2335      */
2336     iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
2337     iagp->pmap[extno] = 0;
2338 
2339     /* update the free inode and free extent summary maps
2340      * for the extent to indicate the extent has free inodes
2341      * and no longer represents a free extent.
2342      */
2343     sword = extno >> L2EXTSPERSUM;
2344     mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
2345     iagp->extsmap[sword] |= cpu_to_le32(mask);
2346     iagp->inosmap[sword] &= cpu_to_le32(~mask);
2347 
2348     /* update the free inode and free extent counts for the
2349      * iag.
2350      */
2351     le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
2352     le32_add_cpu(&iagp->nfreeexts, -1);
2353 
2354     /* update the free and backed inode counts for the ag.
2355      */
2356     imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
2357     imap->im_agctl[agno].numinos += INOSPEREXT;
2358 
2359     /* update the free and backed inode counts for the inode map.
2360      */
2361     atomic_add(INOSPEREXT - 1, &imap->im_numfree);
2362     atomic_add(INOSPEREXT, &imap->im_numinos);
2363 
2364     /* write the iags.
2365      */
2366     if (amp)
2367         write_metapage(amp);
2368     if (bmp)
2369         write_metapage(bmp);
2370     if (cmp)
2371         write_metapage(cmp);
2372 
2373     return (0);
2374 
2375       error_out:
2376 
2377     /* release the iags.
2378      */
2379     if (amp)
2380         release_metapage(amp);
2381     if (bmp)
2382         release_metapage(bmp);
2383     if (cmp)
2384         release_metapage(cmp);
2385 
2386     return (rc);
2387 }
2388 
2389 
2390 /*
2391  * NAME:    diNewIAG(imap,iagnop,agno)
2392  *
2393  * FUNCTION:    allocate a new iag for an allocation group.
2394  *
2395  *      first tries to allocate the iag from the inode map
2396  *      iagfree list:
2397  *      if the list has free iags, the head of the list is removed
2398  *      and returned to satisfy the request.
2399  *      if the inode map's iag free list is empty, the inode map
2400  *      is extended to hold a new iag. this new iag is initialized
2401  *      and returned to satisfy the request.
2402  *
2403  * PARAMETERS:
2404  *  imap    - pointer to inode map control structure.
2405  *  iagnop  - pointer to an iag number set with the number of the
2406  *        newly allocated iag upon successful return.
2407  *  agno    - allocation group number.
2408  *  bpp - Buffer pointer to be filled in with new IAG's buffer
2409  *
2410  * RETURN VALUES:
2411  *  0   - success.
2412  *  -ENOSPC - insufficient disk resources.
2413  *  -EIO    - i/o error.
2414  *
2415  * serialization:
2416  *  AG lock held on entry/exit;
2417  *  write lock on the map is held inside;
2418  *  read lock on the map is held on successful completion;
2419  *
2420  * note: new iag transaction:
2421  * . synchronously write iag;
2422  * . write log of xtree and inode of imap;
2423  * . commit;
2424  * . synchronous write of xtree (right to left, bottom to top);
2425  * . at start of logredo(): init in-memory imap with one additional iag page;
2426  * . at end of logredo(): re-read imap inode to determine
2427  *   new imap size;
2428  */
2429 static int
2430 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2431 {
2432     int rc;
2433     int iagno, i, xlen;
2434     struct inode *ipimap;
2435     struct super_block *sb;
2436     struct jfs_sb_info *sbi;
2437     struct metapage *mp;
2438     struct iag *iagp;
2439     s64 xaddr = 0;
2440     s64 blkno;
2441     tid_t tid;
2442     struct inode *iplist[1];
2443 
2444     /* pick up pointers to the inode map and mount inodes */
2445     ipimap = imap->im_ipimap;
2446     sb = ipimap->i_sb;
2447     sbi = JFS_SBI(sb);
2448 
2449     /* acquire the free iag lock */
2450     IAGFREE_LOCK(imap);
2451 
2452     /* if there are any iags on the inode map free iag list,
2453      * allocate the iag from the head of the list.
2454      */
2455     if (imap->im_freeiag >= 0) {
2456         /* pick up the iag number at the head of the list */
2457         iagno = imap->im_freeiag;
2458 
2459         /* determine the logical block number of the iag */
2460         blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2461     } else {
2462         /* no free iags. the inode map will have to be extented
2463          * to include a new iag.
2464          */
2465 
2466         /* acquire inode map lock */
2467         IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
2468 
2469         if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
2470             IWRITE_UNLOCK(ipimap);
2471             IAGFREE_UNLOCK(imap);
2472             jfs_error(imap->im_ipimap->i_sb,
2473                   "ipimap->i_size is wrong\n");
2474             return -EIO;
2475         }
2476 
2477 
2478         /* get the next available iag number */
2479         iagno = imap->im_nextiag;
2480 
2481         /* make sure that we have not exceeded the maximum inode
2482          * number limit.
2483          */
2484         if (iagno > (MAXIAGS - 1)) {
2485             /* release the inode map lock */
2486             IWRITE_UNLOCK(ipimap);
2487 
2488             rc = -ENOSPC;
2489             goto out;
2490         }
2491 
2492         /*
2493          * synchronously append new iag page.
2494          */
2495         /* determine the logical address of iag page to append */
2496         blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2497 
2498         /* Allocate extent for new iag page */
2499         xlen = sbi->nbperpage;
2500         if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
2501             /* release the inode map lock */
2502             IWRITE_UNLOCK(ipimap);
2503 
2504             goto out;
2505         }
2506 
2507         /*
2508          * start transaction of update of the inode map
2509          * addressing structure pointing to the new iag page;
2510          */
2511         tid = txBegin(sb, COMMIT_FORCE);
2512         mutex_lock(&JFS_IP(ipimap)->commit_mutex);
2513 
2514         /* update the inode map addressing structure to point to it */
2515         if ((rc =
2516              xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2517             txEnd(tid);
2518             mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2519             /* Free the blocks allocated for the iag since it was
2520              * not successfully added to the inode map
2521              */
2522             dbFree(ipimap, xaddr, (s64) xlen);
2523 
2524             /* release the inode map lock */
2525             IWRITE_UNLOCK(ipimap);
2526 
2527             goto out;
2528         }
2529 
2530         /* update the inode map's inode to reflect the extension */
2531         ipimap->i_size += PSIZE;
2532         inode_add_bytes(ipimap, PSIZE);
2533 
2534         /* assign a buffer for the page */
2535         mp = get_metapage(ipimap, blkno, PSIZE, 0);
2536         if (!mp) {
2537             /*
2538              * This is very unlikely since we just created the
2539              * extent, but let's try to handle it correctly
2540              */
2541             xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
2542                    COMMIT_PWMAP);
2543 
2544             txAbort(tid, 0);
2545             txEnd(tid);
2546             mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2547 
2548             /* release the inode map lock */
2549             IWRITE_UNLOCK(ipimap);
2550 
2551             rc = -EIO;
2552             goto out;
2553         }
2554         iagp = (struct iag *) mp->data;
2555 
2556         /* init the iag */
2557         memset(iagp, 0, sizeof(struct iag));
2558         iagp->iagnum = cpu_to_le32(iagno);
2559         iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2560         iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2561         iagp->iagfree = cpu_to_le32(-1);
2562         iagp->nfreeinos = 0;
2563         iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
2564 
2565         /* initialize the free inode summary map (free extent
2566          * summary map initialization handled by bzero).
2567          */
2568         for (i = 0; i < SMAPSZ; i++)
2569             iagp->inosmap[i] = cpu_to_le32(ONES);
2570 
2571         /*
2572          * Write and sync the metapage
2573          */
2574         flush_metapage(mp);
2575 
2576         /*
2577          * txCommit(COMMIT_FORCE) will synchronously write address
2578          * index pages and inode after commit in careful update order
2579          * of address index pages (right to left, bottom up);
2580          */
2581         iplist[0] = ipimap;
2582         rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
2583 
2584         txEnd(tid);
2585         mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2586 
2587         duplicateIXtree(sb, blkno, xlen, &xaddr);
2588 
2589         /* update the next available iag number */
2590         imap->im_nextiag += 1;
2591 
2592         /* Add the iag to the iag free list so we don't lose the iag
2593          * if a failure happens now.
2594          */
2595         imap->im_freeiag = iagno;
2596 
2597         /* Until we have logredo working, we want the imap inode &
2598          * control page to be up to date.
2599          */
2600         diSync(ipimap);
2601 
2602         /* release the inode map lock */
2603         IWRITE_UNLOCK(ipimap);
2604     }
2605 
2606     /* obtain read lock on map */
2607     IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2608 
2609     /* read the iag */
2610     if ((rc = diIAGRead(imap, iagno, &mp))) {
2611         IREAD_UNLOCK(ipimap);
2612         rc = -EIO;
2613         goto out;
2614     }
2615     iagp = (struct iag *) mp->data;
2616 
2617     /* remove the iag from the iag free list */
2618     imap->im_freeiag = le32_to_cpu(iagp->iagfree);
2619     iagp->iagfree = cpu_to_le32(-1);
2620 
2621     /* set the return iag number and buffer pointer */
2622     *iagnop = iagno;
2623     *mpp = mp;
2624 
2625       out:
2626     /* release the iag free lock */
2627     IAGFREE_UNLOCK(imap);
2628 
2629     return (rc);
2630 }
2631 
2632 /*
2633  * NAME:    diIAGRead()
2634  *
2635  * FUNCTION:    get the buffer for the specified iag within a fileset
2636  *      or aggregate inode map.
2637  *
2638  * PARAMETERS:
2639  *  imap    - pointer to inode map control structure.
2640  *  iagno   - iag number.
2641  *  bpp - point to buffer pointer to be filled in on successful
2642  *        exit.
2643  *
2644  * SERIALIZATION:
2645  *  must have read lock on imap inode
2646  *  (When called by diExtendFS, the filesystem is quiesced, therefore
2647  *   the read lock is unnecessary.)
2648  *
2649  * RETURN VALUES:
2650  *  0   - success.
2651  *  -EIO    - i/o error.
2652  */
2653 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2654 {
2655     struct inode *ipimap = imap->im_ipimap;
2656     s64 blkno;
2657 
2658     /* compute the logical block number of the iag. */
2659     blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
2660 
2661     /* read the iag. */
2662     *mpp = read_metapage(ipimap, blkno, PSIZE, 0);
2663     if (*mpp == NULL) {
2664         return -EIO;
2665     }
2666 
2667     return (0);
2668 }
2669 
2670 /*
2671  * NAME:    diFindFree()
2672  *
2673  * FUNCTION:    find the first free bit in a word starting at
2674  *      the specified bit position.
2675  *
2676  * PARAMETERS:
2677  *  word    - word to be examined.
2678  *  start   - starting bit position.
2679  *
2680  * RETURN VALUES:
2681  *  bit position of first free bit in the word or 32 if
2682  *  no free bits were found.
2683  */
2684 static int diFindFree(u32 word, int start)
2685 {
2686     int bitno;
2687     assert(start < 32);
2688     /* scan the word for the first free bit. */
2689     for (word <<= start, bitno = start; bitno < 32;
2690          bitno++, word <<= 1) {
2691         if ((word & HIGHORDER) == 0)
2692             break;
2693     }
2694     return (bitno);
2695 }
2696 
2697 /*
2698  * NAME:    diUpdatePMap()
2699  *
2700  * FUNCTION: Update the persistent map in an IAG for the allocation or
2701  *  freeing of the specified inode.
2702  *
2703  * PRE CONDITIONS: Working map has already been updated for allocate.
2704  *
2705  * PARAMETERS:
2706  *  ipimap  - Incore inode map inode
2707  *  inum    - Number of inode to mark in permanent map
2708  *  is_free - If 'true' indicates inode should be marked freed, otherwise
2709  *        indicates inode should be marked allocated.
2710  *
2711  * RETURN VALUES:
2712  *      0 for success
2713  */
2714 int
2715 diUpdatePMap(struct inode *ipimap,
2716          unsigned long inum, bool is_free, struct tblock * tblk)
2717 {
2718     int rc;
2719     struct iag *iagp;
2720     struct metapage *mp;
2721     int iagno, ino, extno, bitno;
2722     struct inomap *imap;
2723     u32 mask;
2724     struct jfs_log *log;
2725     int lsn, difft, diffp;
2726     unsigned long flags;
2727 
2728     imap = JFS_IP(ipimap)->i_imap;
2729     /* get the iag number containing the inode */
2730     iagno = INOTOIAG(inum);
2731     /* make sure that the iag is contained within the map */
2732     if (iagno >= imap->im_nextiag) {
2733         jfs_error(ipimap->i_sb, "the iag is outside the map\n");
2734         return -EIO;
2735     }
2736     /* read the iag */
2737     IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2738     rc = diIAGRead(imap, iagno, &mp);
2739     IREAD_UNLOCK(ipimap);
2740     if (rc)
2741         return (rc);
2742     metapage_wait_for_io(mp);
2743     iagp = (struct iag *) mp->data;
2744     /* get the inode number and extent number of the inode within
2745      * the iag and the inode number within the extent.
2746      */
2747     ino = inum & (INOSPERIAG - 1);
2748     extno = ino >> L2INOSPEREXT;
2749     bitno = ino & (INOSPEREXT - 1);
2750     mask = HIGHORDER >> bitno;
2751     /*
2752      * mark the inode free in persistent map:
2753      */
2754     if (is_free) {
2755         /* The inode should have been allocated both in working
2756          * map and in persistent map;
2757          * the inode will be freed from working map at the release
2758          * of last reference release;
2759          */
2760         if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2761             jfs_error(ipimap->i_sb,
2762                   "inode %ld not marked as allocated in wmap!\n",
2763                   inum);
2764         }
2765         if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
2766             jfs_error(ipimap->i_sb,
2767                   "inode %ld not marked as allocated in pmap!\n",
2768                   inum);
2769         }
2770         /* update the bitmap for the extent of the freed inode */
2771         iagp->pmap[extno] &= cpu_to_le32(~mask);
2772     }
2773     /*
2774      * mark the inode allocated in persistent map:
2775      */
2776     else {
2777         /* The inode should be already allocated in the working map
2778          * and should be free in persistent map;
2779          */
2780         if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2781             release_metapage(mp);
2782             jfs_error(ipimap->i_sb,
2783                   "the inode is not allocated in the working map\n");
2784             return -EIO;
2785         }
2786         if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) {
2787             release_metapage(mp);
2788             jfs_error(ipimap->i_sb,
2789                   "the inode is not free in the persistent map\n");
2790             return -EIO;
2791         }
2792         /* update the bitmap for the extent of the allocated inode */
2793         iagp->pmap[extno] |= cpu_to_le32(mask);
2794     }
2795     /*
2796      * update iag lsn
2797      */
2798     lsn = tblk->lsn;
2799     log = JFS_SBI(tblk->sb)->log;
2800     LOGSYNC_LOCK(log, flags);
2801     if (mp->lsn != 0) {
2802         /* inherit older/smaller lsn */
2803         logdiff(difft, lsn, log);
2804         logdiff(diffp, mp->lsn, log);
2805         if (difft < diffp) {
2806             mp->lsn = lsn;
2807             /* move mp after tblock in logsync list */
2808             list_move(&mp->synclist, &tblk->synclist);
2809         }
2810         /* inherit younger/larger clsn */
2811         assert(mp->clsn);
2812         logdiff(difft, tblk->clsn, log);
2813         logdiff(diffp, mp->clsn, log);
2814         if (difft > diffp)
2815             mp->clsn = tblk->clsn;
2816     } else {
2817         mp->log = log;
2818         mp->lsn = lsn;
2819         /* insert mp after tblock in logsync list */
2820         log->count++;
2821         list_add(&mp->synclist, &tblk->synclist);
2822         mp->clsn = tblk->clsn;
2823     }
2824     LOGSYNC_UNLOCK(log, flags);
2825     write_metapage(mp);
2826     return (0);
2827 }
2828 
2829 /*
2830  *  diExtendFS()
2831  *
2832  * function: update imap for extendfs();
2833  *
2834  * note: AG size has been increased s.t. each k old contiguous AGs are
2835  * coalesced into a new AG;
2836  */
2837 int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2838 {
2839     int rc, rcx = 0;
2840     struct inomap *imap = JFS_IP(ipimap)->i_imap;
2841     struct iag *iagp = NULL, *hiagp = NULL;
2842     struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap;
2843     struct metapage *bp, *hbp;
2844     int i, n, head;
2845     int numinos, xnuminos = 0, xnumfree = 0;
2846     s64 agstart;
2847 
2848     jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d",
2849            imap->im_nextiag, atomic_read(&imap->im_numinos),
2850            atomic_read(&imap->im_numfree));
2851 
2852     /*
2853      *  reconstruct imap
2854      *
2855      * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2856      * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
2857      * note: new AG size = old AG size * (2**x).
2858      */
2859 
2860     /* init per AG control information im_agctl[] */
2861     for (i = 0; i < MAXAG; i++) {
2862         imap->im_agctl[i].inofree = -1;
2863         imap->im_agctl[i].extfree = -1;
2864         imap->im_agctl[i].numinos = 0;  /* number of backed inodes */
2865         imap->im_agctl[i].numfree = 0;  /* number of free backed inodes */
2866     }
2867 
2868     /*
2869      *  process each iag page of the map.
2870      *
2871      * rebuild AG Free Inode List, AG Free Inode Extent List;
2872      */
2873     for (i = 0; i < imap->im_nextiag; i++) {
2874         if ((rc = diIAGRead(imap, i, &bp))) {
2875             rcx = rc;
2876             continue;
2877         }
2878         iagp = (struct iag *) bp->data;
2879         if (le32_to_cpu(iagp->iagnum) != i) {
2880             release_metapage(bp);
2881             jfs_error(ipimap->i_sb, "unexpected value of iagnum\n");
2882             return -EIO;
2883         }
2884 
2885         /* leave free iag in the free iag list */
2886         if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2887             release_metapage(bp);
2888             continue;
2889         }
2890 
2891         agstart = le64_to_cpu(iagp->agstart);
2892         n = agstart >> mp->db_agl2size;
2893         iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
2894 
2895         /* compute backed inodes */
2896         numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
2897             << L2INOSPEREXT;
2898         if (numinos > 0) {
2899             /* merge AG backed inodes */
2900             imap->im_agctl[n].numinos += numinos;
2901             xnuminos += numinos;
2902         }
2903 
2904         /* if any backed free inodes, insert at AG free inode list */
2905         if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
2906             if ((head = imap->im_agctl[n].inofree) == -1) {
2907                 iagp->inofreefwd = cpu_to_le32(-1);
2908                 iagp->inofreeback = cpu_to_le32(-1);
2909             } else {
2910                 if ((rc = diIAGRead(imap, head, &hbp))) {
2911                     rcx = rc;
2912                     goto nextiag;
2913                 }
2914                 hiagp = (struct iag *) hbp->data;
2915                 hiagp->inofreeback = iagp->iagnum;
2916                 iagp->inofreefwd = cpu_to_le32(head);
2917                 iagp->inofreeback = cpu_to_le32(-1);
2918                 write_metapage(hbp);
2919             }
2920 
2921             imap->im_agctl[n].inofree =
2922                 le32_to_cpu(iagp->iagnum);
2923 
2924             /* merge AG backed free inodes */
2925             imap->im_agctl[n].numfree +=
2926                 le32_to_cpu(iagp->nfreeinos);
2927             xnumfree += le32_to_cpu(iagp->nfreeinos);
2928         }
2929 
2930         /* if any free extents, insert at AG free extent list */
2931         if (le32_to_cpu(iagp->nfreeexts) > 0) {
2932             if ((head = imap->im_agctl[n].extfree) == -1) {
2933                 iagp->extfreefwd = cpu_to_le32(-1);
2934                 iagp->extfreeback = cpu_to_le32(-1);
2935             } else {
2936                 if ((rc = diIAGRead(imap, head, &hbp))) {
2937                     rcx = rc;
2938                     goto nextiag;
2939                 }
2940                 hiagp = (struct iag *) hbp->data;
2941                 hiagp->extfreeback = iagp->iagnum;
2942                 iagp->extfreefwd = cpu_to_le32(head);
2943                 iagp->extfreeback = cpu_to_le32(-1);
2944                 write_metapage(hbp);
2945             }
2946 
2947             imap->im_agctl[n].extfree =
2948                 le32_to_cpu(iagp->iagnum);
2949         }
2950 
2951           nextiag:
2952         write_metapage(bp);
2953     }
2954 
2955     if (xnuminos != atomic_read(&imap->im_numinos) ||
2956         xnumfree != atomic_read(&imap->im_numfree)) {
2957         jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n");
2958         return -EIO;
2959     }
2960 
2961     return rcx;
2962 }
2963 
2964 
2965 /*
2966  *  duplicateIXtree()
2967  *
2968  * serialization: IWRITE_LOCK held on entry/exit
2969  *
2970  * note: shadow page with regular inode (rel.2);
2971  */
2972 static void duplicateIXtree(struct super_block *sb, s64 blkno,
2973                 int xlen, s64 *xaddr)
2974 {
2975     struct jfs_superblock *j_sb;
2976     struct buffer_head *bh;
2977     struct inode *ip;
2978     tid_t tid;
2979 
2980     /* if AIT2 ipmap2 is bad, do not try to update it */
2981     if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT)    /* s_flag */
2982         return;
2983     ip = diReadSpecial(sb, FILESYSTEM_I, 1);
2984     if (ip == NULL) {
2985         JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
2986         if (readSuper(sb, &bh))
2987             return;
2988         j_sb = (struct jfs_superblock *)bh->b_data;
2989         j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
2990 
2991         mark_buffer_dirty(bh);
2992         sync_dirty_buffer(bh);
2993         brelse(bh);
2994         return;
2995     }
2996 
2997     /* start transaction */
2998     tid = txBegin(sb, COMMIT_FORCE);
2999     /* update the inode map addressing structure to point to it */
3000     if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
3001         JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3002         txAbort(tid, 1);
3003         goto cleanup;
3004 
3005     }
3006     /* update the inode map's inode to reflect the extension */
3007     ip->i_size += PSIZE;
3008     inode_add_bytes(ip, PSIZE);
3009     txCommit(tid, 1, &ip, COMMIT_FORCE);
3010       cleanup:
3011     txEnd(tid);
3012     diFreeSpecial(ip);
3013 }
3014 
3015 /*
3016  * NAME:    copy_from_dinode()
3017  *
3018  * FUNCTION:    Copies inode info from disk inode to in-memory inode
3019  *
3020  * RETURN VALUES:
3021  *  0   - success
3022  *  -ENOMEM - insufficient memory
3023  */
3024 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3025 {
3026     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3027     struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3028 
3029     jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3030     jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3031     jfs_set_inode_flags(ip);
3032 
3033     ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
3034     if (sbi->umask != -1) {
3035         ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
3036         /* For directories, add x permission if r is allowed by umask */
3037         if (S_ISDIR(ip->i_mode)) {
3038             if (ip->i_mode & 0400)
3039                 ip->i_mode |= 0100;
3040             if (ip->i_mode & 0040)
3041                 ip->i_mode |= 0010;
3042             if (ip->i_mode & 0004)
3043                 ip->i_mode |= 0001;
3044         }
3045     }
3046     set_nlink(ip, le32_to_cpu(dip->di_nlink));
3047 
3048     jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3049     if (!uid_valid(sbi->uid))
3050         ip->i_uid = jfs_ip->saved_uid;
3051     else {
3052         ip->i_uid = sbi->uid;
3053     }
3054 
3055     jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3056     if (!gid_valid(sbi->gid))
3057         ip->i_gid = jfs_ip->saved_gid;
3058     else {
3059         ip->i_gid = sbi->gid;
3060     }
3061 
3062     ip->i_size = le64_to_cpu(dip->di_size);
3063     ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
3064     ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
3065     ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
3066     ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
3067     ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
3068     ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
3069     ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
3070     ip->i_generation = le32_to_cpu(dip->di_gen);
3071 
3072     jfs_ip->ixpxd = dip->di_ixpxd;  /* in-memory pxd's are little-endian */
3073     jfs_ip->acl = dip->di_acl;  /* as are dxd's */
3074     jfs_ip->ea = dip->di_ea;
3075     jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
3076     jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
3077     jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
3078 
3079     if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) {
3080         jfs_ip->dev = le32_to_cpu(dip->di_rdev);
3081         ip->i_rdev = new_decode_dev(jfs_ip->dev);
3082     }
3083 
3084     if (S_ISDIR(ip->i_mode)) {
3085         memcpy(&jfs_ip->u.dir, &dip->u._dir, 384);
3086     } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) {
3087         memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
3088     } else
3089         memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128);
3090 
3091     /* Zero the in-memory-only stuff */
3092     jfs_ip->cflag = 0;
3093     jfs_ip->btindex = 0;
3094     jfs_ip->btorder = 0;
3095     jfs_ip->bxflag = 0;
3096     jfs_ip->blid = 0;
3097     jfs_ip->atlhead = 0;
3098     jfs_ip->atltail = 0;
3099     jfs_ip->xtlid = 0;
3100     return (0);
3101 }
3102 
3103 /*
3104  * NAME:    copy_to_dinode()
3105  *
3106  * FUNCTION:    Copies inode info from in-memory inode to disk inode
3107  */
3108 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3109 {
3110     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3111     struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3112 
3113     dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
3114     dip->di_inostamp = cpu_to_le32(sbi->inostamp);
3115     dip->di_number = cpu_to_le32(ip->i_ino);
3116     dip->di_gen = cpu_to_le32(ip->i_generation);
3117     dip->di_size = cpu_to_le64(ip->i_size);
3118     dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3119     dip->di_nlink = cpu_to_le32(ip->i_nlink);
3120     if (!uid_valid(sbi->uid))
3121         dip->di_uid = cpu_to_le32(i_uid_read(ip));
3122     else
3123         dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3124                            jfs_ip->saved_uid));
3125     if (!gid_valid(sbi->gid))
3126         dip->di_gid = cpu_to_le32(i_gid_read(ip));
3127     else
3128         dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3129                             jfs_ip->saved_gid));
3130     /*
3131      * mode2 is only needed for storing the higher order bits.
3132      * Trust i_mode for the lower order ones
3133      */
3134     if (sbi->umask == -1)
3135         dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
3136                        ip->i_mode);
3137     else /* Leave the original permissions alone */
3138         dip->di_mode = cpu_to_le32(jfs_ip->mode2);
3139 
3140     dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
3141     dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
3142     dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
3143     dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
3144     dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
3145     dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
3146     dip->di_ixpxd = jfs_ip->ixpxd;  /* in-memory pxd's are little-endian */
3147     dip->di_acl = jfs_ip->acl;  /* as are dxd's */
3148     dip->di_ea = jfs_ip->ea;
3149     dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
3150     dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
3151     dip->di_otime.tv_nsec = 0;
3152     dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
3153     if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3154         dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3155 }