Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *   Copyright (C) International Business Machines Corp., 2000-2004
0004  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
0005  */
0006 
0007 /*
0008  *  jfs_logmgr.c: log manager
0009  *
0010  * for related information, see transaction manager (jfs_txnmgr.c), and
0011  * recovery manager (jfs_logredo.c).
0012  *
0013  * note: for detail, RTFS.
0014  *
0015  *  log buffer manager:
0016  * special purpose buffer manager supporting log i/o requirements.
0017  * per log serial pageout of logpage
0018  * queuing i/o requests and redrive i/o at iodone
0019  * maintain current logpage buffer
0020  * no caching since append only
0021  * appropriate jfs buffer cache buffers as needed
0022  *
0023  *  group commit:
0024  * transactions which wrote COMMIT records in the same in-memory
0025  * log page during the pageout of previous/current log page(s) are
0026  * committed together by the pageout of the page.
0027  *
0028  *  TBD lazy commit:
0029  * transactions are committed asynchronously when the log page
0030  * containing it COMMIT is paged out when it becomes full;
0031  *
0032  *  serialization:
0033  * . a per log lock serialize log write.
0034  * . a per log lock serialize group commit.
0035  * . a per log lock serialize log open/close;
0036  *
0037  *  TBD log integrity:
0038  * careful-write (ping-pong) of last logpage to recover from crash
0039  * in overwrite.
0040  * detection of split (out-of-order) write of physical sectors
0041  * of last logpage via timestamp at end of each sector
0042  * with its mirror data array at trailer).
0043  *
0044  *  alternatives:
0045  * lsn - 64-bit monotonically increasing integer vs
0046  * 32-bit lspn and page eor.
0047  */
0048 
0049 #include <linux/fs.h>
0050 #include <linux/blkdev.h>
0051 #include <linux/interrupt.h>
0052 #include <linux/completion.h>
0053 #include <linux/kthread.h>
0054 #include <linux/buffer_head.h>      /* for sync_blockdev() */
0055 #include <linux/bio.h>
0056 #include <linux/freezer.h>
0057 #include <linux/export.h>
0058 #include <linux/delay.h>
0059 #include <linux/mutex.h>
0060 #include <linux/seq_file.h>
0061 #include <linux/slab.h>
0062 #include "jfs_incore.h"
0063 #include "jfs_filsys.h"
0064 #include "jfs_metapage.h"
0065 #include "jfs_superblock.h"
0066 #include "jfs_txnmgr.h"
0067 #include "jfs_debug.h"
0068 
0069 
0070 /*
0071  * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
0072  */
0073 static struct lbuf *log_redrive_list;
0074 static DEFINE_SPINLOCK(log_redrive_lock);
0075 
0076 
0077 /*
0078  *  log read/write serialization (per log)
0079  */
0080 #define LOG_LOCK_INIT(log)  mutex_init(&(log)->loglock)
0081 #define LOG_LOCK(log)       mutex_lock(&((log)->loglock))
0082 #define LOG_UNLOCK(log)     mutex_unlock(&((log)->loglock))
0083 
0084 
0085 /*
0086  *  log group commit serialization (per log)
0087  */
0088 
0089 #define LOGGC_LOCK_INIT(log)    spin_lock_init(&(log)->gclock)
0090 #define LOGGC_LOCK(log)     spin_lock_irq(&(log)->gclock)
0091 #define LOGGC_UNLOCK(log)   spin_unlock_irq(&(log)->gclock)
0092 #define LOGGC_WAKEUP(tblk)  wake_up_all(&(tblk)->gcwait)
0093 
0094 /*
0095  *  log sync serialization (per log)
0096  */
0097 #define LOGSYNC_DELTA(logsize)      min((logsize)/8, 128*LOGPSIZE)
0098 #define LOGSYNC_BARRIER(logsize)    ((logsize)/4)
0099 /*
0100 #define LOGSYNC_DELTA(logsize)      min((logsize)/4, 256*LOGPSIZE)
0101 #define LOGSYNC_BARRIER(logsize)    ((logsize)/2)
0102 */
0103 
0104 
0105 /*
0106  *  log buffer cache synchronization
0107  */
0108 static DEFINE_SPINLOCK(jfsLCacheLock);
0109 
0110 #define LCACHE_LOCK(flags)  spin_lock_irqsave(&jfsLCacheLock, flags)
0111 #define LCACHE_UNLOCK(flags)    spin_unlock_irqrestore(&jfsLCacheLock, flags)
0112 
0113 /*
0114  * See __SLEEP_COND in jfs_locks.h
0115  */
0116 #define LCACHE_SLEEP_COND(wq, cond, flags)  \
0117 do {                        \
0118     if (cond)               \
0119         break;              \
0120     __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
0121 } while (0)
0122 
0123 #define LCACHE_WAKEUP(event)    wake_up(event)
0124 
0125 
0126 /*
0127  *  lbuf buffer cache (lCache) control
0128  */
0129 /* log buffer manager pageout control (cumulative, inclusive) */
0130 #define lbmREAD     0x0001
0131 #define lbmWRITE    0x0002  /* enqueue at tail of write queue;
0132                  * init pageout if at head of queue;
0133                  */
0134 #define lbmRELEASE  0x0004  /* remove from write queue
0135                  * at completion of pageout;
0136                  * do not free/recycle it yet:
0137                  * caller will free it;
0138                  */
0139 #define lbmSYNC     0x0008  /* do not return to freelist
0140                  * when removed from write queue;
0141                  */
0142 #define lbmFREE     0x0010  /* return to freelist
0143                  * at completion of pageout;
0144                  * the buffer may be recycled;
0145                  */
0146 #define lbmDONE     0x0020
0147 #define lbmERROR    0x0040
0148 #define lbmGC       0x0080  /* lbmIODone to perform post-GC processing
0149                  * of log page
0150                  */
0151 #define lbmDIRECT   0x0100
0152 
0153 /*
0154  * Global list of active external journals
0155  */
0156 static LIST_HEAD(jfs_external_logs);
0157 static struct jfs_log *dummy_log;
0158 static DEFINE_MUTEX(jfs_log_mutex);
0159 
0160 /*
0161  * forward references
0162  */
0163 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
0164              struct lrd * lrd, struct tlock * tlck);
0165 
0166 static int lmNextPage(struct jfs_log * log);
0167 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
0168                int activate);
0169 
0170 static int open_inline_log(struct super_block *sb);
0171 static int open_dummy_log(struct super_block *sb);
0172 static int lbmLogInit(struct jfs_log * log);
0173 static void lbmLogShutdown(struct jfs_log * log);
0174 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
0175 static void lbmFree(struct lbuf * bp);
0176 static void lbmfree(struct lbuf * bp);
0177 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
0178 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
0179 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
0180 static int lbmIOWait(struct lbuf * bp, int flag);
0181 static bio_end_io_t lbmIODone;
0182 static void lbmStartIO(struct lbuf * bp);
0183 static void lmGCwrite(struct jfs_log * log, int cant_block);
0184 static int lmLogSync(struct jfs_log * log, int hard_sync);
0185 
0186 
0187 
0188 /*
0189  *  statistics
0190  */
0191 #ifdef CONFIG_JFS_STATISTICS
0192 static struct lmStat {
0193     uint commit;        /* # of commit */
0194     uint pagedone;      /* # of page written */
0195     uint submitted;     /* # of pages submitted */
0196     uint full_page;     /* # of full pages submitted */
0197     uint partial_page;  /* # of partial pages submitted */
0198 } lmStat;
0199 #endif
0200 
0201 static void write_special_inodes(struct jfs_log *log,
0202                  int (*writer)(struct address_space *))
0203 {
0204     struct jfs_sb_info *sbi;
0205 
0206     list_for_each_entry(sbi, &log->sb_list, log_list) {
0207         writer(sbi->ipbmap->i_mapping);
0208         writer(sbi->ipimap->i_mapping);
0209         writer(sbi->direct_inode->i_mapping);
0210     }
0211 }
0212 
0213 /*
0214  * NAME:    lmLog()
0215  *
0216  * FUNCTION:    write a log record;
0217  *
0218  * PARAMETER:
0219  *
0220  * RETURN:  lsn - offset to the next log record to write (end-of-log);
0221  *      -1  - error;
0222  *
0223  * note: todo: log error handler
0224  */
0225 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
0226       struct tlock * tlck)
0227 {
0228     int lsn;
0229     int diffp, difft;
0230     struct metapage *mp = NULL;
0231     unsigned long flags;
0232 
0233     jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
0234          log, tblk, lrd, tlck);
0235 
0236     LOG_LOCK(log);
0237 
0238     /* log by (out-of-transaction) JFS ? */
0239     if (tblk == NULL)
0240         goto writeRecord;
0241 
0242     /* log from page ? */
0243     if (tlck == NULL ||
0244         tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
0245         goto writeRecord;
0246 
0247     /*
0248      *  initialize/update page/transaction recovery lsn
0249      */
0250     lsn = log->lsn;
0251 
0252     LOGSYNC_LOCK(log, flags);
0253 
0254     /*
0255      * initialize page lsn if first log write of the page
0256      */
0257     if (mp->lsn == 0) {
0258         mp->log = log;
0259         mp->lsn = lsn;
0260         log->count++;
0261 
0262         /* insert page at tail of logsynclist */
0263         list_add_tail(&mp->synclist, &log->synclist);
0264     }
0265 
0266     /*
0267      *  initialize/update lsn of tblock of the page
0268      *
0269      * transaction inherits oldest lsn of pages associated
0270      * with allocation/deallocation of resources (their
0271      * log records are used to reconstruct allocation map
0272      * at recovery time: inode for inode allocation map,
0273      * B+-tree index of extent descriptors for block
0274      * allocation map);
0275      * allocation map pages inherit transaction lsn at
0276      * commit time to allow forwarding log syncpt past log
0277      * records associated with allocation/deallocation of
0278      * resources only after persistent map of these map pages
0279      * have been updated and propagated to home.
0280      */
0281     /*
0282      * initialize transaction lsn:
0283      */
0284     if (tblk->lsn == 0) {
0285         /* inherit lsn of its first page logged */
0286         tblk->lsn = mp->lsn;
0287         log->count++;
0288 
0289         /* insert tblock after the page on logsynclist */
0290         list_add(&tblk->synclist, &mp->synclist);
0291     }
0292     /*
0293      * update transaction lsn:
0294      */
0295     else {
0296         /* inherit oldest/smallest lsn of page */
0297         logdiff(diffp, mp->lsn, log);
0298         logdiff(difft, tblk->lsn, log);
0299         if (diffp < difft) {
0300             /* update tblock lsn with page lsn */
0301             tblk->lsn = mp->lsn;
0302 
0303             /* move tblock after page on logsynclist */
0304             list_move(&tblk->synclist, &mp->synclist);
0305         }
0306     }
0307 
0308     LOGSYNC_UNLOCK(log, flags);
0309 
0310     /*
0311      *  write the log record
0312      */
0313       writeRecord:
0314     lsn = lmWriteRecord(log, tblk, lrd, tlck);
0315 
0316     /*
0317      * forward log syncpt if log reached next syncpt trigger
0318      */
0319     logdiff(diffp, lsn, log);
0320     if (diffp >= log->nextsync)
0321         lsn = lmLogSync(log, 0);
0322 
0323     /* update end-of-log lsn */
0324     log->lsn = lsn;
0325 
0326     LOG_UNLOCK(log);
0327 
0328     /* return end-of-log address */
0329     return lsn;
0330 }
0331 
0332 /*
0333  * NAME:    lmWriteRecord()
0334  *
0335  * FUNCTION:    move the log record to current log page
0336  *
0337  * PARAMETER:   cd  - commit descriptor
0338  *
0339  * RETURN:  end-of-log address
0340  *
0341  * serialization: LOG_LOCK() held on entry/exit
0342  */
0343 static int
0344 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
0345           struct tlock * tlck)
0346 {
0347     int lsn = 0;        /* end-of-log address */
0348     struct lbuf *bp;    /* dst log page buffer */
0349     struct logpage *lp; /* dst log page */
0350     caddr_t dst;        /* destination address in log page */
0351     int dstoffset;      /* end-of-log offset in log page */
0352     int freespace;      /* free space in log page */
0353     caddr_t p;      /* src meta-data page */
0354     caddr_t src;
0355     int srclen;
0356     int nbytes;     /* number of bytes to move */
0357     int i;
0358     int len;
0359     struct linelock *linelock;
0360     struct lv *lv;
0361     struct lvd *lvd;
0362     int l2linesize;
0363 
0364     len = 0;
0365 
0366     /* retrieve destination log page to write */
0367     bp = (struct lbuf *) log->bp;
0368     lp = (struct logpage *) bp->l_ldata;
0369     dstoffset = log->eor;
0370 
0371     /* any log data to write ? */
0372     if (tlck == NULL)
0373         goto moveLrd;
0374 
0375     /*
0376      *  move log record data
0377      */
0378     /* retrieve source meta-data page to log */
0379     if (tlck->flag & tlckPAGELOCK) {
0380         p = (caddr_t) (tlck->mp->data);
0381         linelock = (struct linelock *) & tlck->lock;
0382     }
0383     /* retrieve source in-memory inode to log */
0384     else if (tlck->flag & tlckINODELOCK) {
0385         if (tlck->type & tlckDTREE)
0386             p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
0387         else
0388             p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
0389         linelock = (struct linelock *) & tlck->lock;
0390     }
0391     else {
0392         jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
0393         return 0;   /* Probably should trap */
0394     }
0395     l2linesize = linelock->l2linesize;
0396 
0397       moveData:
0398     ASSERT(linelock->index <= linelock->maxcnt);
0399 
0400     lv = linelock->lv;
0401     for (i = 0; i < linelock->index; i++, lv++) {
0402         if (lv->length == 0)
0403             continue;
0404 
0405         /* is page full ? */
0406         if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
0407             /* page become full: move on to next page */
0408             lmNextPage(log);
0409 
0410             bp = log->bp;
0411             lp = (struct logpage *) bp->l_ldata;
0412             dstoffset = LOGPHDRSIZE;
0413         }
0414 
0415         /*
0416          * move log vector data
0417          */
0418         src = (u8 *) p + (lv->offset << l2linesize);
0419         srclen = lv->length << l2linesize;
0420         len += srclen;
0421         while (srclen > 0) {
0422             freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
0423             nbytes = min(freespace, srclen);
0424             dst = (caddr_t) lp + dstoffset;
0425             memcpy(dst, src, nbytes);
0426             dstoffset += nbytes;
0427 
0428             /* is page not full ? */
0429             if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
0430                 break;
0431 
0432             /* page become full: move on to next page */
0433             lmNextPage(log);
0434 
0435             bp = (struct lbuf *) log->bp;
0436             lp = (struct logpage *) bp->l_ldata;
0437             dstoffset = LOGPHDRSIZE;
0438 
0439             srclen -= nbytes;
0440             src += nbytes;
0441         }
0442 
0443         /*
0444          * move log vector descriptor
0445          */
0446         len += 4;
0447         lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
0448         lvd->offset = cpu_to_le16(lv->offset);
0449         lvd->length = cpu_to_le16(lv->length);
0450         dstoffset += 4;
0451         jfs_info("lmWriteRecord: lv offset:%d length:%d",
0452              lv->offset, lv->length);
0453     }
0454 
0455     if ((i = linelock->next)) {
0456         linelock = (struct linelock *) lid_to_tlock(i);
0457         goto moveData;
0458     }
0459 
0460     /*
0461      *  move log record descriptor
0462      */
0463       moveLrd:
0464     lrd->length = cpu_to_le16(len);
0465 
0466     src = (caddr_t) lrd;
0467     srclen = LOGRDSIZE;
0468 
0469     while (srclen > 0) {
0470         freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
0471         nbytes = min(freespace, srclen);
0472         dst = (caddr_t) lp + dstoffset;
0473         memcpy(dst, src, nbytes);
0474 
0475         dstoffset += nbytes;
0476         srclen -= nbytes;
0477 
0478         /* are there more to move than freespace of page ? */
0479         if (srclen)
0480             goto pageFull;
0481 
0482         /*
0483          * end of log record descriptor
0484          */
0485 
0486         /* update last log record eor */
0487         log->eor = dstoffset;
0488         bp->l_eor = dstoffset;
0489         lsn = (log->page << L2LOGPSIZE) + dstoffset;
0490 
0491         if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
0492             tblk->clsn = lsn;
0493             jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
0494                  bp->l_eor);
0495 
0496             INCREMENT(lmStat.commit);   /* # of commit */
0497 
0498             /*
0499              * enqueue tblock for group commit:
0500              *
0501              * enqueue tblock of non-trivial/synchronous COMMIT
0502              * at tail of group commit queue
0503              * (trivial/asynchronous COMMITs are ignored by
0504              * group commit.)
0505              */
0506             LOGGC_LOCK(log);
0507 
0508             /* init tblock gc state */
0509             tblk->flag = tblkGC_QUEUE;
0510             tblk->bp = log->bp;
0511             tblk->pn = log->page;
0512             tblk->eor = log->eor;
0513 
0514             /* enqueue transaction to commit queue */
0515             list_add_tail(&tblk->cqueue, &log->cqueue);
0516 
0517             LOGGC_UNLOCK(log);
0518         }
0519 
0520         jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
0521             le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
0522 
0523         /* page not full ? */
0524         if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
0525             return lsn;
0526 
0527           pageFull:
0528         /* page become full: move on to next page */
0529         lmNextPage(log);
0530 
0531         bp = (struct lbuf *) log->bp;
0532         lp = (struct logpage *) bp->l_ldata;
0533         dstoffset = LOGPHDRSIZE;
0534         src += nbytes;
0535     }
0536 
0537     return lsn;
0538 }
0539 
0540 
0541 /*
0542  * NAME:    lmNextPage()
0543  *
0544  * FUNCTION:    write current page and allocate next page.
0545  *
0546  * PARAMETER:   log
0547  *
0548  * RETURN:  0
0549  *
0550  * serialization: LOG_LOCK() held on entry/exit
0551  */
0552 static int lmNextPage(struct jfs_log * log)
0553 {
0554     struct logpage *lp;
0555     int lspn;       /* log sequence page number */
0556     int pn;         /* current page number */
0557     struct lbuf *bp;
0558     struct lbuf *nextbp;
0559     struct tblock *tblk;
0560 
0561     /* get current log page number and log sequence page number */
0562     pn = log->page;
0563     bp = log->bp;
0564     lp = (struct logpage *) bp->l_ldata;
0565     lspn = le32_to_cpu(lp->h.page);
0566 
0567     LOGGC_LOCK(log);
0568 
0569     /*
0570      *  write or queue the full page at the tail of write queue
0571      */
0572     /* get the tail tblk on commit queue */
0573     if (list_empty(&log->cqueue))
0574         tblk = NULL;
0575     else
0576         tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
0577 
0578     /* every tblk who has COMMIT record on the current page,
0579      * and has not been committed, must be on commit queue
0580      * since tblk is queued at commit queueu at the time
0581      * of writing its COMMIT record on the page before
0582      * page becomes full (even though the tblk thread
0583      * who wrote COMMIT record may have been suspended
0584      * currently);
0585      */
0586 
0587     /* is page bound with outstanding tail tblk ? */
0588     if (tblk && tblk->pn == pn) {
0589         /* mark tblk for end-of-page */
0590         tblk->flag |= tblkGC_EOP;
0591 
0592         if (log->cflag & logGC_PAGEOUT) {
0593             /* if page is not already on write queue,
0594              * just enqueue (no lbmWRITE to prevent redrive)
0595              * buffer to wqueue to ensure correct serial order
0596              * of the pages since log pages will be added
0597              * continuously
0598              */
0599             if (bp->l_wqnext == NULL)
0600                 lbmWrite(log, bp, 0, 0);
0601         } else {
0602             /*
0603              * No current GC leader, initiate group commit
0604              */
0605             log->cflag |= logGC_PAGEOUT;
0606             lmGCwrite(log, 0);
0607         }
0608     }
0609     /* page is not bound with outstanding tblk:
0610      * init write or mark it to be redriven (lbmWRITE)
0611      */
0612     else {
0613         /* finalize the page */
0614         bp->l_ceor = bp->l_eor;
0615         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
0616         lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
0617     }
0618     LOGGC_UNLOCK(log);
0619 
0620     /*
0621      *  allocate/initialize next page
0622      */
0623     /* if log wraps, the first data page of log is 2
0624      * (0 never used, 1 is superblock).
0625      */
0626     log->page = (pn == log->size - 1) ? 2 : pn + 1;
0627     log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
0628 
0629     /* allocate/initialize next log page buffer */
0630     nextbp = lbmAllocate(log, log->page);
0631     nextbp->l_eor = log->eor;
0632     log->bp = nextbp;
0633 
0634     /* initialize next log page */
0635     lp = (struct logpage *) nextbp->l_ldata;
0636     lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
0637     lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
0638 
0639     return 0;
0640 }
0641 
0642 
0643 /*
0644  * NAME:    lmGroupCommit()
0645  *
0646  * FUNCTION:    group commit
0647  *  initiate pageout of the pages with COMMIT in the order of
0648  *  page number - redrive pageout of the page at the head of
0649  *  pageout queue until full page has been written.
0650  *
0651  * RETURN:
0652  *
0653  * NOTE:
0654  *  LOGGC_LOCK serializes log group commit queue, and
0655  *  transaction blocks on the commit queue.
0656  *  N.B. LOG_LOCK is NOT held during lmGroupCommit().
0657  */
0658 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
0659 {
0660     int rc = 0;
0661 
0662     LOGGC_LOCK(log);
0663 
0664     /* group committed already ? */
0665     if (tblk->flag & tblkGC_COMMITTED) {
0666         if (tblk->flag & tblkGC_ERROR)
0667             rc = -EIO;
0668 
0669         LOGGC_UNLOCK(log);
0670         return rc;
0671     }
0672     jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
0673 
0674     if (tblk->xflag & COMMIT_LAZY)
0675         tblk->flag |= tblkGC_LAZY;
0676 
0677     if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
0678         (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
0679          || jfs_tlocks_low)) {
0680         /*
0681          * No pageout in progress
0682          *
0683          * start group commit as its group leader.
0684          */
0685         log->cflag |= logGC_PAGEOUT;
0686 
0687         lmGCwrite(log, 0);
0688     }
0689 
0690     if (tblk->xflag & COMMIT_LAZY) {
0691         /*
0692          * Lazy transactions can leave now
0693          */
0694         LOGGC_UNLOCK(log);
0695         return 0;
0696     }
0697 
0698     /* lmGCwrite gives up LOGGC_LOCK, check again */
0699 
0700     if (tblk->flag & tblkGC_COMMITTED) {
0701         if (tblk->flag & tblkGC_ERROR)
0702             rc = -EIO;
0703 
0704         LOGGC_UNLOCK(log);
0705         return rc;
0706     }
0707 
0708     /* upcount transaction waiting for completion
0709      */
0710     log->gcrtc++;
0711     tblk->flag |= tblkGC_READY;
0712 
0713     __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
0714              LOGGC_LOCK(log), LOGGC_UNLOCK(log));
0715 
0716     /* removed from commit queue */
0717     if (tblk->flag & tblkGC_ERROR)
0718         rc = -EIO;
0719 
0720     LOGGC_UNLOCK(log);
0721     return rc;
0722 }
0723 
0724 /*
0725  * NAME:    lmGCwrite()
0726  *
0727  * FUNCTION:    group commit write
0728  *  initiate write of log page, building a group of all transactions
0729  *  with commit records on that page.
0730  *
0731  * RETURN:  None
0732  *
0733  * NOTE:
0734  *  LOGGC_LOCK must be held by caller.
0735  *  N.B. LOG_LOCK is NOT held during lmGroupCommit().
0736  */
0737 static void lmGCwrite(struct jfs_log * log, int cant_write)
0738 {
0739     struct lbuf *bp;
0740     struct logpage *lp;
0741     int gcpn;       /* group commit page number */
0742     struct tblock *tblk;
0743     struct tblock *xtblk = NULL;
0744 
0745     /*
0746      * build the commit group of a log page
0747      *
0748      * scan commit queue and make a commit group of all
0749      * transactions with COMMIT records on the same log page.
0750      */
0751     /* get the head tblk on the commit queue */
0752     gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
0753 
0754     list_for_each_entry(tblk, &log->cqueue, cqueue) {
0755         if (tblk->pn != gcpn)
0756             break;
0757 
0758         xtblk = tblk;
0759 
0760         /* state transition: (QUEUE, READY) -> COMMIT */
0761         tblk->flag |= tblkGC_COMMIT;
0762     }
0763     tblk = xtblk;       /* last tblk of the page */
0764 
0765     /*
0766      * pageout to commit transactions on the log page.
0767      */
0768     bp = (struct lbuf *) tblk->bp;
0769     lp = (struct logpage *) bp->l_ldata;
0770     /* is page already full ? */
0771     if (tblk->flag & tblkGC_EOP) {
0772         /* mark page to free at end of group commit of the page */
0773         tblk->flag &= ~tblkGC_EOP;
0774         tblk->flag |= tblkGC_FREE;
0775         bp->l_ceor = bp->l_eor;
0776         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
0777         lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
0778              cant_write);
0779         INCREMENT(lmStat.full_page);
0780     }
0781     /* page is not yet full */
0782     else {
0783         bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
0784         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
0785         lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
0786         INCREMENT(lmStat.partial_page);
0787     }
0788 }
0789 
0790 /*
0791  * NAME:    lmPostGC()
0792  *
0793  * FUNCTION:    group commit post-processing
0794  *  Processes transactions after their commit records have been written
0795  *  to disk, redriving log I/O if necessary.
0796  *
0797  * RETURN:  None
0798  *
0799  * NOTE:
0800  *  This routine is called a interrupt time by lbmIODone
0801  */
0802 static void lmPostGC(struct lbuf * bp)
0803 {
0804     unsigned long flags;
0805     struct jfs_log *log = bp->l_log;
0806     struct logpage *lp;
0807     struct tblock *tblk, *temp;
0808 
0809     //LOGGC_LOCK(log);
0810     spin_lock_irqsave(&log->gclock, flags);
0811     /*
0812      * current pageout of group commit completed.
0813      *
0814      * remove/wakeup transactions from commit queue who were
0815      * group committed with the current log page
0816      */
0817     list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
0818         if (!(tblk->flag & tblkGC_COMMIT))
0819             break;
0820         /* if transaction was marked GC_COMMIT then
0821          * it has been shipped in the current pageout
0822          * and made it to disk - it is committed.
0823          */
0824 
0825         if (bp->l_flag & lbmERROR)
0826             tblk->flag |= tblkGC_ERROR;
0827 
0828         /* remove it from the commit queue */
0829         list_del(&tblk->cqueue);
0830         tblk->flag &= ~tblkGC_QUEUE;
0831 
0832         if (tblk == log->flush_tblk) {
0833             /* we can stop flushing the log now */
0834             clear_bit(log_FLUSH, &log->flag);
0835             log->flush_tblk = NULL;
0836         }
0837 
0838         jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
0839              tblk->flag);
0840 
0841         if (!(tblk->xflag & COMMIT_FORCE))
0842             /*
0843              * Hand tblk over to lazy commit thread
0844              */
0845             txLazyUnlock(tblk);
0846         else {
0847             /* state transition: COMMIT -> COMMITTED */
0848             tblk->flag |= tblkGC_COMMITTED;
0849 
0850             if (tblk->flag & tblkGC_READY)
0851                 log->gcrtc--;
0852 
0853             LOGGC_WAKEUP(tblk);
0854         }
0855 
0856         /* was page full before pageout ?
0857          * (and this is the last tblk bound with the page)
0858          */
0859         if (tblk->flag & tblkGC_FREE)
0860             lbmFree(bp);
0861         /* did page become full after pageout ?
0862          * (and this is the last tblk bound with the page)
0863          */
0864         else if (tblk->flag & tblkGC_EOP) {
0865             /* finalize the page */
0866             lp = (struct logpage *) bp->l_ldata;
0867             bp->l_ceor = bp->l_eor;
0868             lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
0869             jfs_info("lmPostGC: calling lbmWrite");
0870             lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
0871                  1);
0872         }
0873 
0874     }
0875 
0876     /* are there any transactions who have entered lnGroupCommit()
0877      * (whose COMMITs are after that of the last log page written.
0878      * They are waiting for new group commit (above at (SLEEP 1))
0879      * or lazy transactions are on a full (queued) log page,
0880      * select the latest ready transaction as new group leader and
0881      * wake her up to lead her group.
0882      */
0883     if ((!list_empty(&log->cqueue)) &&
0884         ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
0885          test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
0886         /*
0887          * Call lmGCwrite with new group leader
0888          */
0889         lmGCwrite(log, 1);
0890 
0891     /* no transaction are ready yet (transactions are only just
0892      * queued (GC_QUEUE) and not entered for group commit yet).
0893      * the first transaction entering group commit
0894      * will elect herself as new group leader.
0895      */
0896     else
0897         log->cflag &= ~logGC_PAGEOUT;
0898 
0899     //LOGGC_UNLOCK(log);
0900     spin_unlock_irqrestore(&log->gclock, flags);
0901     return;
0902 }
0903 
0904 /*
0905  * NAME:    lmLogSync()
0906  *
0907  * FUNCTION:    write log SYNCPT record for specified log
0908  *  if new sync address is available
0909  *  (normally the case if sync() is executed by back-ground
0910  *  process).
0911  *  calculate new value of i_nextsync which determines when
0912  *  this code is called again.
0913  *
0914  * PARAMETERS:  log - log structure
0915  *      hard_sync - 1 to force all metadata to be written
0916  *
0917  * RETURN:  0
0918  *
0919  * serialization: LOG_LOCK() held on entry/exit
0920  */
0921 static int lmLogSync(struct jfs_log * log, int hard_sync)
0922 {
0923     int logsize;
0924     int written;        /* written since last syncpt */
0925     int free;       /* free space left available */
0926     int delta;      /* additional delta to write normally */
0927     int more;       /* additional write granted */
0928     struct lrd lrd;
0929     int lsn;
0930     struct logsyncblk *lp;
0931     unsigned long flags;
0932 
0933     /* push dirty metapages out to disk */
0934     if (hard_sync)
0935         write_special_inodes(log, filemap_fdatawrite);
0936     else
0937         write_special_inodes(log, filemap_flush);
0938 
0939     /*
0940      *  forward syncpt
0941      */
0942     /* if last sync is same as last syncpt,
0943      * invoke sync point forward processing to update sync.
0944      */
0945 
0946     if (log->sync == log->syncpt) {
0947         LOGSYNC_LOCK(log, flags);
0948         if (list_empty(&log->synclist))
0949             log->sync = log->lsn;
0950         else {
0951             lp = list_entry(log->synclist.next,
0952                     struct logsyncblk, synclist);
0953             log->sync = lp->lsn;
0954         }
0955         LOGSYNC_UNLOCK(log, flags);
0956 
0957     }
0958 
0959     /* if sync is different from last syncpt,
0960      * write a SYNCPT record with syncpt = sync.
0961      * reset syncpt = sync
0962      */
0963     if (log->sync != log->syncpt) {
0964         lrd.logtid = 0;
0965         lrd.backchain = 0;
0966         lrd.type = cpu_to_le16(LOG_SYNCPT);
0967         lrd.length = 0;
0968         lrd.log.syncpt.sync = cpu_to_le32(log->sync);
0969         lsn = lmWriteRecord(log, NULL, &lrd, NULL);
0970 
0971         log->syncpt = log->sync;
0972     } else
0973         lsn = log->lsn;
0974 
0975     /*
0976      *  setup next syncpt trigger (SWAG)
0977      */
0978     logsize = log->logsize;
0979 
0980     logdiff(written, lsn, log);
0981     free = logsize - written;
0982     delta = LOGSYNC_DELTA(logsize);
0983     more = min(free / 2, delta);
0984     if (more < 2 * LOGPSIZE) {
0985         jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
0986         /*
0987          *  log wrapping
0988          *
0989          * option 1 - panic ? No.!
0990          * option 2 - shutdown file systems
0991          *        associated with log ?
0992          * option 3 - extend log ?
0993          * option 4 - second chance
0994          *
0995          * mark log wrapped, and continue.
0996          * when all active transactions are completed,
0997          * mark log valid for recovery.
0998          * if crashed during invalid state, log state
0999          * implies invalid log, forcing fsck().
1000          */
1001         /* mark log state log wrap in log superblock */
1002         /* log->state = LOGWRAP; */
1003 
1004         /* reset sync point computation */
1005         log->syncpt = log->sync = lsn;
1006         log->nextsync = delta;
1007     } else
1008         /* next syncpt trigger = written + more */
1009         log->nextsync = written + more;
1010 
1011     /* if number of bytes written from last sync point is more
1012      * than 1/4 of the log size, stop new transactions from
1013      * starting until all current transactions are completed
1014      * by setting syncbarrier flag.
1015      */
1016     if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1017         (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1018         set_bit(log_SYNCBARRIER, &log->flag);
1019         jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1020              log->syncpt);
1021         /*
1022          * We may have to initiate group commit
1023          */
1024         jfs_flush_journal(log, 0);
1025     }
1026 
1027     return lsn;
1028 }
1029 
1030 /*
1031  * NAME:    jfs_syncpt
1032  *
1033  * FUNCTION:    write log SYNCPT record for specified log
1034  *
1035  * PARAMETERS:  log   - log structure
1036  *      hard_sync - set to 1 to force metadata to be written
1037  */
1038 void jfs_syncpt(struct jfs_log *log, int hard_sync)
1039 {   LOG_LOCK(log);
1040     if (!test_bit(log_QUIESCE, &log->flag))
1041         lmLogSync(log, hard_sync);
1042     LOG_UNLOCK(log);
1043 }
1044 
1045 /*
1046  * NAME:    lmLogOpen()
1047  *
1048  * FUNCTION:    open the log on first open;
1049  *  insert filesystem in the active list of the log.
1050  *
1051  * PARAMETER:   ipmnt   - file system mount inode
1052  *      iplog   - log inode (out)
1053  *
1054  * RETURN:
1055  *
1056  * serialization:
1057  */
1058 int lmLogOpen(struct super_block *sb)
1059 {
1060     int rc;
1061     struct block_device *bdev;
1062     struct jfs_log *log;
1063     struct jfs_sb_info *sbi = JFS_SBI(sb);
1064 
1065     if (sbi->flag & JFS_NOINTEGRITY)
1066         return open_dummy_log(sb);
1067 
1068     if (sbi->mntflag & JFS_INLINELOG)
1069         return open_inline_log(sb);
1070 
1071     mutex_lock(&jfs_log_mutex);
1072     list_for_each_entry(log, &jfs_external_logs, journal_list) {
1073         if (log->bdev->bd_dev == sbi->logdev) {
1074             if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1075                 jfs_warn("wrong uuid on JFS journal");
1076                 mutex_unlock(&jfs_log_mutex);
1077                 return -EINVAL;
1078             }
1079             /*
1080              * add file system to log active file system list
1081              */
1082             if ((rc = lmLogFileSystem(log, sbi, 1))) {
1083                 mutex_unlock(&jfs_log_mutex);
1084                 return rc;
1085             }
1086             goto journal_found;
1087         }
1088     }
1089 
1090     if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1091         mutex_unlock(&jfs_log_mutex);
1092         return -ENOMEM;
1093     }
1094     INIT_LIST_HEAD(&log->sb_list);
1095     init_waitqueue_head(&log->syncwait);
1096 
1097     /*
1098      *  external log as separate logical volume
1099      *
1100      * file systems to log may have n-to-1 relationship;
1101      */
1102 
1103     bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1104                  log);
1105     if (IS_ERR(bdev)) {
1106         rc = PTR_ERR(bdev);
1107         goto free;
1108     }
1109 
1110     log->bdev = bdev;
1111     uuid_copy(&log->uuid, &sbi->loguuid);
1112 
1113     /*
1114      * initialize log:
1115      */
1116     if ((rc = lmLogInit(log)))
1117         goto close;
1118 
1119     list_add(&log->journal_list, &jfs_external_logs);
1120 
1121     /*
1122      * add file system to log active file system list
1123      */
1124     if ((rc = lmLogFileSystem(log, sbi, 1)))
1125         goto shutdown;
1126 
1127 journal_found:
1128     LOG_LOCK(log);
1129     list_add(&sbi->log_list, &log->sb_list);
1130     sbi->log = log;
1131     LOG_UNLOCK(log);
1132 
1133     mutex_unlock(&jfs_log_mutex);
1134     return 0;
1135 
1136     /*
1137      *  unwind on error
1138      */
1139       shutdown:     /* unwind lbmLogInit() */
1140     list_del(&log->journal_list);
1141     lbmLogShutdown(log);
1142 
1143       close:        /* close external log device */
1144     blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1145 
1146       free:     /* free log descriptor */
1147     mutex_unlock(&jfs_log_mutex);
1148     kfree(log);
1149 
1150     jfs_warn("lmLogOpen: exit(%d)", rc);
1151     return rc;
1152 }
1153 
1154 static int open_inline_log(struct super_block *sb)
1155 {
1156     struct jfs_log *log;
1157     int rc;
1158 
1159     if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1160         return -ENOMEM;
1161     INIT_LIST_HEAD(&log->sb_list);
1162     init_waitqueue_head(&log->syncwait);
1163 
1164     set_bit(log_INLINELOG, &log->flag);
1165     log->bdev = sb->s_bdev;
1166     log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1167     log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1168         (L2LOGPSIZE - sb->s_blocksize_bits);
1169     log->l2bsize = sb->s_blocksize_bits;
1170     ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1171 
1172     /*
1173      * initialize log.
1174      */
1175     if ((rc = lmLogInit(log))) {
1176         kfree(log);
1177         jfs_warn("lmLogOpen: exit(%d)", rc);
1178         return rc;
1179     }
1180 
1181     list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1182     JFS_SBI(sb)->log = log;
1183 
1184     return rc;
1185 }
1186 
1187 static int open_dummy_log(struct super_block *sb)
1188 {
1189     int rc;
1190 
1191     mutex_lock(&jfs_log_mutex);
1192     if (!dummy_log) {
1193         dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1194         if (!dummy_log) {
1195             mutex_unlock(&jfs_log_mutex);
1196             return -ENOMEM;
1197         }
1198         INIT_LIST_HEAD(&dummy_log->sb_list);
1199         init_waitqueue_head(&dummy_log->syncwait);
1200         dummy_log->no_integrity = 1;
1201         /* Make up some stuff */
1202         dummy_log->base = 0;
1203         dummy_log->size = 1024;
1204         rc = lmLogInit(dummy_log);
1205         if (rc) {
1206             kfree(dummy_log);
1207             dummy_log = NULL;
1208             mutex_unlock(&jfs_log_mutex);
1209             return rc;
1210         }
1211     }
1212 
1213     LOG_LOCK(dummy_log);
1214     list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1215     JFS_SBI(sb)->log = dummy_log;
1216     LOG_UNLOCK(dummy_log);
1217     mutex_unlock(&jfs_log_mutex);
1218 
1219     return 0;
1220 }
1221 
1222 /*
1223  * NAME:    lmLogInit()
1224  *
1225  * FUNCTION:    log initialization at first log open.
1226  *
1227  *  logredo() (or logformat()) should have been run previously.
1228  *  initialize the log from log superblock.
1229  *  set the log state in the superblock to LOGMOUNT and
1230  *  write SYNCPT log record.
1231  *
1232  * PARAMETER:   log - log structure
1233  *
1234  * RETURN:  0   - if ok
1235  *      -EINVAL - bad log magic number or superblock dirty
1236  *      error returned from logwait()
1237  *
1238  * serialization: single first open thread
1239  */
1240 int lmLogInit(struct jfs_log * log)
1241 {
1242     int rc = 0;
1243     struct lrd lrd;
1244     struct logsuper *logsuper;
1245     struct lbuf *bpsuper;
1246     struct lbuf *bp;
1247     struct logpage *lp;
1248     int lsn = 0;
1249 
1250     jfs_info("lmLogInit: log:0x%p", log);
1251 
1252     /* initialize the group commit serialization lock */
1253     LOGGC_LOCK_INIT(log);
1254 
1255     /* allocate/initialize the log write serialization lock */
1256     LOG_LOCK_INIT(log);
1257 
1258     LOGSYNC_LOCK_INIT(log);
1259 
1260     INIT_LIST_HEAD(&log->synclist);
1261 
1262     INIT_LIST_HEAD(&log->cqueue);
1263     log->flush_tblk = NULL;
1264 
1265     log->count = 0;
1266 
1267     /*
1268      * initialize log i/o
1269      */
1270     if ((rc = lbmLogInit(log)))
1271         return rc;
1272 
1273     if (!test_bit(log_INLINELOG, &log->flag))
1274         log->l2bsize = L2LOGPSIZE;
1275 
1276     /* check for disabled journaling to disk */
1277     if (log->no_integrity) {
1278         /*
1279          * Journal pages will still be filled.  When the time comes
1280          * to actually do the I/O, the write is not done, and the
1281          * endio routine is called directly.
1282          */
1283         bp = lbmAllocate(log , 0);
1284         log->bp = bp;
1285         bp->l_pn = bp->l_eor = 0;
1286     } else {
1287         /*
1288          * validate log superblock
1289          */
1290         if ((rc = lbmRead(log, 1, &bpsuper)))
1291             goto errout10;
1292 
1293         logsuper = (struct logsuper *) bpsuper->l_ldata;
1294 
1295         if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1296             jfs_warn("*** Log Format Error ! ***");
1297             rc = -EINVAL;
1298             goto errout20;
1299         }
1300 
1301         /* logredo() should have been run successfully. */
1302         if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1303             jfs_warn("*** Log Is Dirty ! ***");
1304             rc = -EINVAL;
1305             goto errout20;
1306         }
1307 
1308         /* initialize log from log superblock */
1309         if (test_bit(log_INLINELOG,&log->flag)) {
1310             if (log->size != le32_to_cpu(logsuper->size)) {
1311                 rc = -EINVAL;
1312                 goto errout20;
1313             }
1314             jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1315                  log, (unsigned long long)log->base, log->size);
1316         } else {
1317             if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1318                 jfs_warn("wrong uuid on JFS log device");
1319                 rc = -EINVAL;
1320                 goto errout20;
1321             }
1322             log->size = le32_to_cpu(logsuper->size);
1323             log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1324             jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1325                  log, (unsigned long long)log->base, log->size);
1326         }
1327 
1328         log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1329         log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1330 
1331         /*
1332          * initialize for log append write mode
1333          */
1334         /* establish current/end-of-log page/buffer */
1335         if ((rc = lbmRead(log, log->page, &bp)))
1336             goto errout20;
1337 
1338         lp = (struct logpage *) bp->l_ldata;
1339 
1340         jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1341              le32_to_cpu(logsuper->end), log->page, log->eor,
1342              le16_to_cpu(lp->h.eor));
1343 
1344         log->bp = bp;
1345         bp->l_pn = log->page;
1346         bp->l_eor = log->eor;
1347 
1348         /* if current page is full, move on to next page */
1349         if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1350             lmNextPage(log);
1351 
1352         /*
1353          * initialize log syncpoint
1354          */
1355         /*
1356          * write the first SYNCPT record with syncpoint = 0
1357          * (i.e., log redo up to HERE !);
1358          * remove current page from lbm write queue at end of pageout
1359          * (to write log superblock update), but do not release to
1360          * freelist;
1361          */
1362         lrd.logtid = 0;
1363         lrd.backchain = 0;
1364         lrd.type = cpu_to_le16(LOG_SYNCPT);
1365         lrd.length = 0;
1366         lrd.log.syncpt.sync = 0;
1367         lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1368         bp = log->bp;
1369         bp->l_ceor = bp->l_eor;
1370         lp = (struct logpage *) bp->l_ldata;
1371         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1372         lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1373         if ((rc = lbmIOWait(bp, 0)))
1374             goto errout30;
1375 
1376         /*
1377          * update/write superblock
1378          */
1379         logsuper->state = cpu_to_le32(LOGMOUNT);
1380         log->serial = le32_to_cpu(logsuper->serial) + 1;
1381         logsuper->serial = cpu_to_le32(log->serial);
1382         lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1383         if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1384             goto errout30;
1385     }
1386 
1387     /* initialize logsync parameters */
1388     log->logsize = (log->size - 2) << L2LOGPSIZE;
1389     log->lsn = lsn;
1390     log->syncpt = lsn;
1391     log->sync = log->syncpt;
1392     log->nextsync = LOGSYNC_DELTA(log->logsize);
1393 
1394     jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1395          log->lsn, log->syncpt, log->sync);
1396 
1397     /*
1398      * initialize for lazy/group commit
1399      */
1400     log->clsn = lsn;
1401 
1402     return 0;
1403 
1404     /*
1405      *  unwind on error
1406      */
1407       errout30:     /* release log page */
1408     log->wqueue = NULL;
1409     bp->l_wqnext = NULL;
1410     lbmFree(bp);
1411 
1412       errout20:     /* release log superblock */
1413     lbmFree(bpsuper);
1414 
1415       errout10:     /* unwind lbmLogInit() */
1416     lbmLogShutdown(log);
1417 
1418     jfs_warn("lmLogInit: exit(%d)", rc);
1419     return rc;
1420 }
1421 
1422 
1423 /*
1424  * NAME:    lmLogClose()
1425  *
1426  * FUNCTION:    remove file system <ipmnt> from active list of log <iplog>
1427  *      and close it on last close.
1428  *
1429  * PARAMETER:   sb  - superblock
1430  *
1431  * RETURN:  errors from subroutines
1432  *
1433  * serialization:
1434  */
1435 int lmLogClose(struct super_block *sb)
1436 {
1437     struct jfs_sb_info *sbi = JFS_SBI(sb);
1438     struct jfs_log *log = sbi->log;
1439     struct block_device *bdev;
1440     int rc = 0;
1441 
1442     jfs_info("lmLogClose: log:0x%p", log);
1443 
1444     mutex_lock(&jfs_log_mutex);
1445     LOG_LOCK(log);
1446     list_del(&sbi->log_list);
1447     LOG_UNLOCK(log);
1448     sbi->log = NULL;
1449 
1450     /*
1451      * We need to make sure all of the "written" metapages
1452      * actually make it to disk
1453      */
1454     sync_blockdev(sb->s_bdev);
1455 
1456     if (test_bit(log_INLINELOG, &log->flag)) {
1457         /*
1458          *  in-line log in host file system
1459          */
1460         rc = lmLogShutdown(log);
1461         kfree(log);
1462         goto out;
1463     }
1464 
1465     if (!log->no_integrity)
1466         lmLogFileSystem(log, sbi, 0);
1467 
1468     if (!list_empty(&log->sb_list))
1469         goto out;
1470 
1471     /*
1472      * TODO: ensure that the dummy_log is in a state to allow
1473      * lbmLogShutdown to deallocate all the buffers and call
1474      * kfree against dummy_log.  For now, leave dummy_log & its
1475      * buffers in memory, and resuse if another no-integrity mount
1476      * is requested.
1477      */
1478     if (log->no_integrity)
1479         goto out;
1480 
1481     /*
1482      *  external log as separate logical volume
1483      */
1484     list_del(&log->journal_list);
1485     bdev = log->bdev;
1486     rc = lmLogShutdown(log);
1487 
1488     blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1489 
1490     kfree(log);
1491 
1492       out:
1493     mutex_unlock(&jfs_log_mutex);
1494     jfs_info("lmLogClose: exit(%d)", rc);
1495     return rc;
1496 }
1497 
1498 
1499 /*
1500  * NAME:    jfs_flush_journal()
1501  *
1502  * FUNCTION:    initiate write of any outstanding transactions to the journal
1503  *      and optionally wait until they are all written to disk
1504  *
1505  *      wait == 0  flush until latest txn is committed, don't wait
1506  *      wait == 1  flush until latest txn is committed, wait
1507  *      wait > 1   flush until all txn's are complete, wait
1508  */
1509 void jfs_flush_journal(struct jfs_log *log, int wait)
1510 {
1511     int i;
1512     struct tblock *target = NULL;
1513 
1514     /* jfs_write_inode may call us during read-only mount */
1515     if (!log)
1516         return;
1517 
1518     jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1519 
1520     LOGGC_LOCK(log);
1521 
1522     if (!list_empty(&log->cqueue)) {
1523         /*
1524          * This ensures that we will keep writing to the journal as long
1525          * as there are unwritten commit records
1526          */
1527         target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1528 
1529         if (test_bit(log_FLUSH, &log->flag)) {
1530             /*
1531              * We're already flushing.
1532              * if flush_tblk is NULL, we are flushing everything,
1533              * so leave it that way.  Otherwise, update it to the
1534              * latest transaction
1535              */
1536             if (log->flush_tblk)
1537                 log->flush_tblk = target;
1538         } else {
1539             /* Only flush until latest transaction is committed */
1540             log->flush_tblk = target;
1541             set_bit(log_FLUSH, &log->flag);
1542 
1543             /*
1544              * Initiate I/O on outstanding transactions
1545              */
1546             if (!(log->cflag & logGC_PAGEOUT)) {
1547                 log->cflag |= logGC_PAGEOUT;
1548                 lmGCwrite(log, 0);
1549             }
1550         }
1551     }
1552     if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1553         /* Flush until all activity complete */
1554         set_bit(log_FLUSH, &log->flag);
1555         log->flush_tblk = NULL;
1556     }
1557 
1558     if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1559         DECLARE_WAITQUEUE(__wait, current);
1560 
1561         add_wait_queue(&target->gcwait, &__wait);
1562         set_current_state(TASK_UNINTERRUPTIBLE);
1563         LOGGC_UNLOCK(log);
1564         schedule();
1565         LOGGC_LOCK(log);
1566         remove_wait_queue(&target->gcwait, &__wait);
1567     }
1568     LOGGC_UNLOCK(log);
1569 
1570     if (wait < 2)
1571         return;
1572 
1573     write_special_inodes(log, filemap_fdatawrite);
1574 
1575     /*
1576      * If there was recent activity, we may need to wait
1577      * for the lazycommit thread to catch up
1578      */
1579     if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1580         for (i = 0; i < 200; i++) { /* Too much? */
1581             msleep(250);
1582             write_special_inodes(log, filemap_fdatawrite);
1583             if (list_empty(&log->cqueue) &&
1584                 list_empty(&log->synclist))
1585                 break;
1586         }
1587     }
1588     assert(list_empty(&log->cqueue));
1589 
1590 #ifdef CONFIG_JFS_DEBUG
1591     if (!list_empty(&log->synclist)) {
1592         struct logsyncblk *lp;
1593 
1594         printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1595         list_for_each_entry(lp, &log->synclist, synclist) {
1596             if (lp->xflag & COMMIT_PAGE) {
1597                 struct metapage *mp = (struct metapage *)lp;
1598                 print_hex_dump(KERN_ERR, "metapage: ",
1599                            DUMP_PREFIX_ADDRESS, 16, 4,
1600                            mp, sizeof(struct metapage), 0);
1601                 print_hex_dump(KERN_ERR, "page: ",
1602                            DUMP_PREFIX_ADDRESS, 16,
1603                            sizeof(long), mp->page,
1604                            sizeof(struct page), 0);
1605             } else
1606                 print_hex_dump(KERN_ERR, "tblock:",
1607                            DUMP_PREFIX_ADDRESS, 16, 4,
1608                            lp, sizeof(struct tblock), 0);
1609         }
1610     }
1611 #else
1612     WARN_ON(!list_empty(&log->synclist));
1613 #endif
1614     clear_bit(log_FLUSH, &log->flag);
1615 }
1616 
1617 /*
1618  * NAME:    lmLogShutdown()
1619  *
1620  * FUNCTION:    log shutdown at last LogClose().
1621  *
1622  *      write log syncpt record.
1623  *      update super block to set redone flag to 0.
1624  *
1625  * PARAMETER:   log - log inode
1626  *
1627  * RETURN:  0   - success
1628  *
1629  * serialization: single last close thread
1630  */
1631 int lmLogShutdown(struct jfs_log * log)
1632 {
1633     int rc;
1634     struct lrd lrd;
1635     int lsn;
1636     struct logsuper *logsuper;
1637     struct lbuf *bpsuper;
1638     struct lbuf *bp;
1639     struct logpage *lp;
1640 
1641     jfs_info("lmLogShutdown: log:0x%p", log);
1642 
1643     jfs_flush_journal(log, 2);
1644 
1645     /*
1646      * write the last SYNCPT record with syncpoint = 0
1647      * (i.e., log redo up to HERE !)
1648      */
1649     lrd.logtid = 0;
1650     lrd.backchain = 0;
1651     lrd.type = cpu_to_le16(LOG_SYNCPT);
1652     lrd.length = 0;
1653     lrd.log.syncpt.sync = 0;
1654 
1655     lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1656     bp = log->bp;
1657     lp = (struct logpage *) bp->l_ldata;
1658     lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1659     lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1660     lbmIOWait(log->bp, lbmFREE);
1661     log->bp = NULL;
1662 
1663     /*
1664      * synchronous update log superblock
1665      * mark log state as shutdown cleanly
1666      * (i.e., Log does not need to be replayed).
1667      */
1668     if ((rc = lbmRead(log, 1, &bpsuper)))
1669         goto out;
1670 
1671     logsuper = (struct logsuper *) bpsuper->l_ldata;
1672     logsuper->state = cpu_to_le32(LOGREDONE);
1673     logsuper->end = cpu_to_le32(lsn);
1674     lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1675     rc = lbmIOWait(bpsuper, lbmFREE);
1676 
1677     jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1678          lsn, log->page, log->eor);
1679 
1680       out:
1681     /*
1682      * shutdown per log i/o
1683      */
1684     lbmLogShutdown(log);
1685 
1686     if (rc) {
1687         jfs_warn("lmLogShutdown: exit(%d)", rc);
1688     }
1689     return rc;
1690 }
1691 
1692 
1693 /*
1694  * NAME:    lmLogFileSystem()
1695  *
1696  * FUNCTION:    insert (<activate> = true)/remove (<activate> = false)
1697  *  file system into/from log active file system list.
1698  *
1699  * PARAMETE:    log - pointer to logs inode.
1700  *      fsdev   - kdev_t of filesystem.
1701  *      serial  - pointer to returned log serial number
1702  *      activate - insert/remove device from active list.
1703  *
1704  * RETURN:  0   - success
1705  *      errors returned by vms_iowait().
1706  */
1707 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1708                int activate)
1709 {
1710     int rc = 0;
1711     int i;
1712     struct logsuper *logsuper;
1713     struct lbuf *bpsuper;
1714     uuid_t *uuid = &sbi->uuid;
1715 
1716     /*
1717      * insert/remove file system device to log active file system list.
1718      */
1719     if ((rc = lbmRead(log, 1, &bpsuper)))
1720         return rc;
1721 
1722     logsuper = (struct logsuper *) bpsuper->l_ldata;
1723     if (activate) {
1724         for (i = 0; i < MAX_ACTIVE; i++)
1725             if (uuid_is_null(&logsuper->active[i].uuid)) {
1726                 uuid_copy(&logsuper->active[i].uuid, uuid);
1727                 sbi->aggregate = i;
1728                 break;
1729             }
1730         if (i == MAX_ACTIVE) {
1731             jfs_warn("Too many file systems sharing journal!");
1732             lbmFree(bpsuper);
1733             return -EMFILE; /* Is there a better rc? */
1734         }
1735     } else {
1736         for (i = 0; i < MAX_ACTIVE; i++)
1737             if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1738                 uuid_copy(&logsuper->active[i].uuid,
1739                       &uuid_null);
1740                 break;
1741             }
1742         if (i == MAX_ACTIVE) {
1743             jfs_warn("Somebody stomped on the journal!");
1744             lbmFree(bpsuper);
1745             return -EIO;
1746         }
1747 
1748     }
1749 
1750     /*
1751      * synchronous write log superblock:
1752      *
1753      * write sidestream bypassing write queue:
1754      * at file system mount, log super block is updated for
1755      * activation of the file system before any log record
1756      * (MOUNT record) of the file system, and at file system
1757      * unmount, all meta data for the file system has been
1758      * flushed before log super block is updated for deactivation
1759      * of the file system.
1760      */
1761     lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1762     rc = lbmIOWait(bpsuper, lbmFREE);
1763 
1764     return rc;
1765 }
1766 
1767 /*
1768  *      log buffer manager (lbm)
1769  *      ------------------------
1770  *
1771  * special purpose buffer manager supporting log i/o requirements.
1772  *
1773  * per log write queue:
1774  * log pageout occurs in serial order by fifo write queue and
1775  * restricting to a single i/o in pregress at any one time.
1776  * a circular singly-linked list
1777  * (log->wrqueue points to the tail, and buffers are linked via
1778  * bp->wrqueue field), and
1779  * maintains log page in pageout ot waiting for pageout in serial pageout.
1780  */
1781 
1782 /*
1783  *  lbmLogInit()
1784  *
1785  * initialize per log I/O setup at lmLogInit()
1786  */
1787 static int lbmLogInit(struct jfs_log * log)
1788 {               /* log inode */
1789     int i;
1790     struct lbuf *lbuf;
1791 
1792     jfs_info("lbmLogInit: log:0x%p", log);
1793 
1794     /* initialize current buffer cursor */
1795     log->bp = NULL;
1796 
1797     /* initialize log device write queue */
1798     log->wqueue = NULL;
1799 
1800     /*
1801      * Each log has its own buffer pages allocated to it.  These are
1802      * not managed by the page cache.  This ensures that a transaction
1803      * writing to the log does not block trying to allocate a page from
1804      * the page cache (for the log).  This would be bad, since page
1805      * allocation waits on the kswapd thread that may be committing inodes
1806      * which would cause log activity.  Was that clear?  I'm trying to
1807      * avoid deadlock here.
1808      */
1809     init_waitqueue_head(&log->free_wait);
1810 
1811     log->lbuf_free = NULL;
1812 
1813     for (i = 0; i < LOGPAGES;) {
1814         char *buffer;
1815         uint offset;
1816         struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1817 
1818         if (!page)
1819             goto error;
1820         buffer = page_address(page);
1821         for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1822             lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1823             if (lbuf == NULL) {
1824                 if (offset == 0)
1825                     __free_page(page);
1826                 goto error;
1827             }
1828             if (offset) /* we already have one reference */
1829                 get_page(page);
1830             lbuf->l_offset = offset;
1831             lbuf->l_ldata = buffer + offset;
1832             lbuf->l_page = page;
1833             lbuf->l_log = log;
1834             init_waitqueue_head(&lbuf->l_ioevent);
1835 
1836             lbuf->l_freelist = log->lbuf_free;
1837             log->lbuf_free = lbuf;
1838             i++;
1839         }
1840     }
1841 
1842     return (0);
1843 
1844       error:
1845     lbmLogShutdown(log);
1846     return -ENOMEM;
1847 }
1848 
1849 
1850 /*
1851  *  lbmLogShutdown()
1852  *
1853  * finalize per log I/O setup at lmLogShutdown()
1854  */
1855 static void lbmLogShutdown(struct jfs_log * log)
1856 {
1857     struct lbuf *lbuf;
1858 
1859     jfs_info("lbmLogShutdown: log:0x%p", log);
1860 
1861     lbuf = log->lbuf_free;
1862     while (lbuf) {
1863         struct lbuf *next = lbuf->l_freelist;
1864         __free_page(lbuf->l_page);
1865         kfree(lbuf);
1866         lbuf = next;
1867     }
1868 }
1869 
1870 
1871 /*
1872  *  lbmAllocate()
1873  *
1874  * allocate an empty log buffer
1875  */
1876 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1877 {
1878     struct lbuf *bp;
1879     unsigned long flags;
1880 
1881     /*
1882      * recycle from log buffer freelist if any
1883      */
1884     LCACHE_LOCK(flags);
1885     LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1886     log->lbuf_free = bp->l_freelist;
1887     LCACHE_UNLOCK(flags);
1888 
1889     bp->l_flag = 0;
1890 
1891     bp->l_wqnext = NULL;
1892     bp->l_freelist = NULL;
1893 
1894     bp->l_pn = pn;
1895     bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1896     bp->l_ceor = 0;
1897 
1898     return bp;
1899 }
1900 
1901 
1902 /*
1903  *  lbmFree()
1904  *
1905  * release a log buffer to freelist
1906  */
1907 static void lbmFree(struct lbuf * bp)
1908 {
1909     unsigned long flags;
1910 
1911     LCACHE_LOCK(flags);
1912 
1913     lbmfree(bp);
1914 
1915     LCACHE_UNLOCK(flags);
1916 }
1917 
1918 static void lbmfree(struct lbuf * bp)
1919 {
1920     struct jfs_log *log = bp->l_log;
1921 
1922     assert(bp->l_wqnext == NULL);
1923 
1924     /*
1925      * return the buffer to head of freelist
1926      */
1927     bp->l_freelist = log->lbuf_free;
1928     log->lbuf_free = bp;
1929 
1930     wake_up(&log->free_wait);
1931     return;
1932 }
1933 
1934 
1935 /*
1936  * NAME:    lbmRedrive
1937  *
1938  * FUNCTION:    add a log buffer to the log redrive list
1939  *
1940  * PARAMETER:
1941  *  bp  - log buffer
1942  *
1943  * NOTES:
1944  *  Takes log_redrive_lock.
1945  */
1946 static inline void lbmRedrive(struct lbuf *bp)
1947 {
1948     unsigned long flags;
1949 
1950     spin_lock_irqsave(&log_redrive_lock, flags);
1951     bp->l_redrive_next = log_redrive_list;
1952     log_redrive_list = bp;
1953     spin_unlock_irqrestore(&log_redrive_lock, flags);
1954 
1955     wake_up_process(jfsIOthread);
1956 }
1957 
1958 
1959 /*
1960  *  lbmRead()
1961  */
1962 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1963 {
1964     struct bio *bio;
1965     struct lbuf *bp;
1966 
1967     /*
1968      * allocate a log buffer
1969      */
1970     *bpp = bp = lbmAllocate(log, pn);
1971     jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1972 
1973     bp->l_flag |= lbmREAD;
1974 
1975     bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS);
1976     bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1977     bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1978     BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1979 
1980     bio->bi_end_io = lbmIODone;
1981     bio->bi_private = bp;
1982     /*check if journaling to disk has been disabled*/
1983     if (log->no_integrity) {
1984         bio->bi_iter.bi_size = 0;
1985         lbmIODone(bio);
1986     } else {
1987         submit_bio(bio);
1988     }
1989 
1990     wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1991 
1992     return 0;
1993 }
1994 
1995 
1996 /*
1997  *  lbmWrite()
1998  *
1999  * buffer at head of pageout queue stays after completion of
2000  * partial-page pageout and redriven by explicit initiation of
2001  * pageout by caller until full-page pageout is completed and
2002  * released.
2003  *
2004  * device driver i/o done redrives pageout of new buffer at
2005  * head of pageout queue when current buffer at head of pageout
2006  * queue is released at the completion of its full-page pageout.
2007  *
2008  * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2009  * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2010  */
2011 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2012              int cant_block)
2013 {
2014     struct lbuf *tail;
2015     unsigned long flags;
2016 
2017     jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2018 
2019     /* map the logical block address to physical block address */
2020     bp->l_blkno =
2021         log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2022 
2023     LCACHE_LOCK(flags);     /* disable+lock */
2024 
2025     /*
2026      * initialize buffer for device driver
2027      */
2028     bp->l_flag = flag;
2029 
2030     /*
2031      *  insert bp at tail of write queue associated with log
2032      *
2033      * (request is either for bp already/currently at head of queue
2034      * or new bp to be inserted at tail)
2035      */
2036     tail = log->wqueue;
2037 
2038     /* is buffer not already on write queue ? */
2039     if (bp->l_wqnext == NULL) {
2040         /* insert at tail of wqueue */
2041         if (tail == NULL) {
2042             log->wqueue = bp;
2043             bp->l_wqnext = bp;
2044         } else {
2045             log->wqueue = bp;
2046             bp->l_wqnext = tail->l_wqnext;
2047             tail->l_wqnext = bp;
2048         }
2049 
2050         tail = bp;
2051     }
2052 
2053     /* is buffer at head of wqueue and for write ? */
2054     if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2055         LCACHE_UNLOCK(flags);   /* unlock+enable */
2056         return;
2057     }
2058 
2059     LCACHE_UNLOCK(flags);   /* unlock+enable */
2060 
2061     if (cant_block)
2062         lbmRedrive(bp);
2063     else if (flag & lbmSYNC)
2064         lbmStartIO(bp);
2065     else {
2066         LOGGC_UNLOCK(log);
2067         lbmStartIO(bp);
2068         LOGGC_LOCK(log);
2069     }
2070 }
2071 
2072 
2073 /*
2074  *  lbmDirectWrite()
2075  *
2076  * initiate pageout bypassing write queue for sidestream
2077  * (e.g., log superblock) write;
2078  */
2079 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2080 {
2081     jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2082          bp, flag, bp->l_pn);
2083 
2084     /*
2085      * initialize buffer for device driver
2086      */
2087     bp->l_flag = flag | lbmDIRECT;
2088 
2089     /* map the logical block address to physical block address */
2090     bp->l_blkno =
2091         log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2092 
2093     /*
2094      *  initiate pageout of the page
2095      */
2096     lbmStartIO(bp);
2097 }
2098 
2099 
2100 /*
2101  * NAME:    lbmStartIO()
2102  *
2103  * FUNCTION:    Interface to DD strategy routine
2104  *
2105  * RETURN:  none
2106  *
2107  * serialization: LCACHE_LOCK() is NOT held during log i/o;
2108  */
2109 static void lbmStartIO(struct lbuf * bp)
2110 {
2111     struct bio *bio;
2112     struct jfs_log *log = bp->l_log;
2113 
2114     jfs_info("lbmStartIO");
2115 
2116     bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
2117     bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2118     bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2119     BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2120 
2121     bio->bi_end_io = lbmIODone;
2122     bio->bi_private = bp;
2123 
2124     /* check if journaling to disk has been disabled */
2125     if (log->no_integrity) {
2126         bio->bi_iter.bi_size = 0;
2127         lbmIODone(bio);
2128     } else {
2129         submit_bio(bio);
2130         INCREMENT(lmStat.submitted);
2131     }
2132 }
2133 
2134 
2135 /*
2136  *  lbmIOWait()
2137  */
2138 static int lbmIOWait(struct lbuf * bp, int flag)
2139 {
2140     unsigned long flags;
2141     int rc = 0;
2142 
2143     jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2144 
2145     LCACHE_LOCK(flags);     /* disable+lock */
2146 
2147     LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2148 
2149     rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2150 
2151     if (flag & lbmFREE)
2152         lbmfree(bp);
2153 
2154     LCACHE_UNLOCK(flags);   /* unlock+enable */
2155 
2156     jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2157     return rc;
2158 }
2159 
2160 /*
2161  *  lbmIODone()
2162  *
2163  * executed at INTIODONE level
2164  */
2165 static void lbmIODone(struct bio *bio)
2166 {
2167     struct lbuf *bp = bio->bi_private;
2168     struct lbuf *nextbp, *tail;
2169     struct jfs_log *log;
2170     unsigned long flags;
2171 
2172     /*
2173      * get back jfs buffer bound to the i/o buffer
2174      */
2175     jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2176 
2177     LCACHE_LOCK(flags);     /* disable+lock */
2178 
2179     bp->l_flag |= lbmDONE;
2180 
2181     if (bio->bi_status) {
2182         bp->l_flag |= lbmERROR;
2183 
2184         jfs_err("lbmIODone: I/O error in JFS log");
2185     }
2186 
2187     bio_put(bio);
2188 
2189     /*
2190      *  pagein completion
2191      */
2192     if (bp->l_flag & lbmREAD) {
2193         bp->l_flag &= ~lbmREAD;
2194 
2195         LCACHE_UNLOCK(flags);   /* unlock+enable */
2196 
2197         /* wakeup I/O initiator */
2198         LCACHE_WAKEUP(&bp->l_ioevent);
2199 
2200         return;
2201     }
2202 
2203     /*
2204      *  pageout completion
2205      *
2206      * the bp at the head of write queue has completed pageout.
2207      *
2208      * if single-commit/full-page pageout, remove the current buffer
2209      * from head of pageout queue, and redrive pageout with
2210      * the new buffer at head of pageout queue;
2211      * otherwise, the partial-page pageout buffer stays at
2212      * the head of pageout queue to be redriven for pageout
2213      * by lmGroupCommit() until full-page pageout is completed.
2214      */
2215     bp->l_flag &= ~lbmWRITE;
2216     INCREMENT(lmStat.pagedone);
2217 
2218     /* update committed lsn */
2219     log = bp->l_log;
2220     log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2221 
2222     if (bp->l_flag & lbmDIRECT) {
2223         LCACHE_WAKEUP(&bp->l_ioevent);
2224         LCACHE_UNLOCK(flags);
2225         return;
2226     }
2227 
2228     tail = log->wqueue;
2229 
2230     /* single element queue */
2231     if (bp == tail) {
2232         /* remove head buffer of full-page pageout
2233          * from log device write queue
2234          */
2235         if (bp->l_flag & lbmRELEASE) {
2236             log->wqueue = NULL;
2237             bp->l_wqnext = NULL;
2238         }
2239     }
2240     /* multi element queue */
2241     else {
2242         /* remove head buffer of full-page pageout
2243          * from log device write queue
2244          */
2245         if (bp->l_flag & lbmRELEASE) {
2246             nextbp = tail->l_wqnext = bp->l_wqnext;
2247             bp->l_wqnext = NULL;
2248 
2249             /*
2250              * redrive pageout of next page at head of write queue:
2251              * redrive next page without any bound tblk
2252              * (i.e., page w/o any COMMIT records), or
2253              * first page of new group commit which has been
2254              * queued after current page (subsequent pageout
2255              * is performed synchronously, except page without
2256              * any COMMITs) by lmGroupCommit() as indicated
2257              * by lbmWRITE flag;
2258              */
2259             if (nextbp->l_flag & lbmWRITE) {
2260                 /*
2261                  * We can't do the I/O at interrupt time.
2262                  * The jfsIO thread can do it
2263                  */
2264                 lbmRedrive(nextbp);
2265             }
2266         }
2267     }
2268 
2269     /*
2270      *  synchronous pageout:
2271      *
2272      * buffer has not necessarily been removed from write queue
2273      * (e.g., synchronous write of partial-page with COMMIT):
2274      * leave buffer for i/o initiator to dispose
2275      */
2276     if (bp->l_flag & lbmSYNC) {
2277         LCACHE_UNLOCK(flags);   /* unlock+enable */
2278 
2279         /* wakeup I/O initiator */
2280         LCACHE_WAKEUP(&bp->l_ioevent);
2281     }
2282 
2283     /*
2284      *  Group Commit pageout:
2285      */
2286     else if (bp->l_flag & lbmGC) {
2287         LCACHE_UNLOCK(flags);
2288         lmPostGC(bp);
2289     }
2290 
2291     /*
2292      *  asynchronous pageout:
2293      *
2294      * buffer must have been removed from write queue:
2295      * insert buffer at head of freelist where it can be recycled
2296      */
2297     else {
2298         assert(bp->l_flag & lbmRELEASE);
2299         assert(bp->l_flag & lbmFREE);
2300         lbmfree(bp);
2301 
2302         LCACHE_UNLOCK(flags);   /* unlock+enable */
2303     }
2304 }
2305 
2306 int jfsIOWait(void *arg)
2307 {
2308     struct lbuf *bp;
2309 
2310     do {
2311         spin_lock_irq(&log_redrive_lock);
2312         while ((bp = log_redrive_list)) {
2313             log_redrive_list = bp->l_redrive_next;
2314             bp->l_redrive_next = NULL;
2315             spin_unlock_irq(&log_redrive_lock);
2316             lbmStartIO(bp);
2317             spin_lock_irq(&log_redrive_lock);
2318         }
2319 
2320         if (freezing(current)) {
2321             spin_unlock_irq(&log_redrive_lock);
2322             try_to_freeze();
2323         } else {
2324             set_current_state(TASK_INTERRUPTIBLE);
2325             spin_unlock_irq(&log_redrive_lock);
2326             schedule();
2327         }
2328     } while (!kthread_should_stop());
2329 
2330     jfs_info("jfsIOWait being killed!");
2331     return 0;
2332 }
2333 
2334 /*
2335  * NAME:    lmLogFormat()/jfs_logform()
2336  *
2337  * FUNCTION:    format file system log
2338  *
2339  * PARAMETERS:
2340  *  log - volume log
2341  *  logAddress - start address of log space in FS block
2342  *  logSize - length of log space in FS block;
2343  *
2344  * RETURN:  0   - success
2345  *      -EIO    - i/o error
2346  *
2347  * XXX: We're synchronously writing one page at a time.  This needs to
2348  *  be improved by writing multiple pages at once.
2349  */
2350 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2351 {
2352     int rc = -EIO;
2353     struct jfs_sb_info *sbi;
2354     struct logsuper *logsuper;
2355     struct logpage *lp;
2356     int lspn;       /* log sequence page number */
2357     struct lrd *lrd_ptr;
2358     int npages = 0;
2359     struct lbuf *bp;
2360 
2361     jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2362          (long long)logAddress, logSize);
2363 
2364     sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2365 
2366     /* allocate a log buffer */
2367     bp = lbmAllocate(log, 1);
2368 
2369     npages = logSize >> sbi->l2nbperpage;
2370 
2371     /*
2372      *  log space:
2373      *
2374      * page 0 - reserved;
2375      * page 1 - log superblock;
2376      * page 2 - log data page: A SYNC log record is written
2377      *      into this page at logform time;
2378      * pages 3-N - log data page: set to empty log data pages;
2379      */
2380     /*
2381      *  init log superblock: log page 1
2382      */
2383     logsuper = (struct logsuper *) bp->l_ldata;
2384 
2385     logsuper->magic = cpu_to_le32(LOGMAGIC);
2386     logsuper->version = cpu_to_le32(LOGVERSION);
2387     logsuper->state = cpu_to_le32(LOGREDONE);
2388     logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
2389     logsuper->size = cpu_to_le32(npages);
2390     logsuper->bsize = cpu_to_le32(sbi->bsize);
2391     logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2392     logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2393 
2394     bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2395     bp->l_blkno = logAddress + sbi->nbperpage;
2396     lbmStartIO(bp);
2397     if ((rc = lbmIOWait(bp, 0)))
2398         goto exit;
2399 
2400     /*
2401      *  init pages 2 to npages-1 as log data pages:
2402      *
2403      * log page sequence number (lpsn) initialization:
2404      *
2405      * pn:   0     1     2     3                 n-1
2406      *       +-----+-----+=====+=====+===.....===+=====+
2407      * lspn:             N-1   0     1           N-2
2408      *                   <--- N page circular file ---->
2409      *
2410      * the N (= npages-2) data pages of the log is maintained as
2411      * a circular file for the log records;
2412      * lpsn grows by 1 monotonically as each log page is written
2413      * to the circular file of the log;
2414      * and setLogpage() will not reset the page number even if
2415      * the eor is equal to LOGPHDRSIZE. In order for binary search
2416      * still work in find log end process, we have to simulate the
2417      * log wrap situation at the log format time.
2418      * The 1st log page written will have the highest lpsn. Then
2419      * the succeeding log pages will have ascending order of
2420      * the lspn starting from 0, ... (N-2)
2421      */
2422     lp = (struct logpage *) bp->l_ldata;
2423     /*
2424      * initialize 1st log page to be written: lpsn = N - 1,
2425      * write a SYNCPT log record is written to this page
2426      */
2427     lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2428     lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2429 
2430     lrd_ptr = (struct lrd *) &lp->data;
2431     lrd_ptr->logtid = 0;
2432     lrd_ptr->backchain = 0;
2433     lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2434     lrd_ptr->length = 0;
2435     lrd_ptr->log.syncpt.sync = 0;
2436 
2437     bp->l_blkno += sbi->nbperpage;
2438     bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2439     lbmStartIO(bp);
2440     if ((rc = lbmIOWait(bp, 0)))
2441         goto exit;
2442 
2443     /*
2444      *  initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2445      */
2446     for (lspn = 0; lspn < npages - 3; lspn++) {
2447         lp->h.page = lp->t.page = cpu_to_le32(lspn);
2448         lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2449 
2450         bp->l_blkno += sbi->nbperpage;
2451         bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2452         lbmStartIO(bp);
2453         if ((rc = lbmIOWait(bp, 0)))
2454             goto exit;
2455     }
2456 
2457     rc = 0;
2458 exit:
2459     /*
2460      *  finalize log
2461      */
2462     /* release the buffer */
2463     lbmFree(bp);
2464 
2465     return rc;
2466 }
2467 
2468 #ifdef CONFIG_JFS_STATISTICS
2469 int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2470 {
2471     seq_printf(m,
2472                "JFS Logmgr stats\n"
2473                "================\n"
2474                "commits = %d\n"
2475                "writes submitted = %d\n"
2476                "writes completed = %d\n"
2477                "full pages submitted = %d\n"
2478                "partial pages submitted = %d\n",
2479                lmStat.commit,
2480                lmStat.submitted,
2481                lmStat.pagedone,
2482                lmStat.full_page,
2483                lmStat.partial_page);
2484     return 0;
2485 }
2486 #endif /* CONFIG_JFS_STATISTICS */