Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  *   Copyright (C) International Business Machines Corp., 2000-2004
0004  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
0005  */
0006 #ifndef _H_JFS_LOGMGR
0007 #define _H_JFS_LOGMGR
0008 
0009 #include <linux/uuid.h>
0010 
0011 #include "jfs_filsys.h"
0012 #include "jfs_lock.h"
0013 
0014 /*
0015  *  log manager configuration parameters
0016  */
0017 
0018 /* log page size */
0019 #define LOGPSIZE    4096
0020 #define L2LOGPSIZE  12
0021 
0022 #define LOGPAGES    16  /* Log pages per mounted file system */
0023 
0024 /*
0025  *  log logical volume
0026  *
0027  * a log is used to make the commit operation on journalled
0028  * files within the same logical volume group atomic.
0029  * a log is implemented with a logical volume.
0030  * there is one log per logical volume group.
0031  *
0032  * block 0 of the log logical volume is not used (ipl etc).
0033  * block 1 contains a log "superblock" and is used by logFormat(),
0034  * lmLogInit(), lmLogShutdown(), and logRedo() to record status
0035  * of the log but is not otherwise used during normal processing.
0036  * blocks 2 - (N-1) are used to contain log records.
0037  *
0038  * when a volume group is varied-on-line, logRedo() must have
0039  * been executed before the file systems (logical volumes) in
0040  * the volume group can be mounted.
0041  */
0042 /*
0043  *  log superblock (block 1 of logical volume)
0044  */
0045 #define LOGSUPER_B  1
0046 #define LOGSTART_B  2
0047 
0048 #define LOGMAGIC    0x87654321
0049 #define LOGVERSION  1
0050 
0051 #define MAX_ACTIVE  128 /* Max active file systems sharing log */
0052 
0053 struct logsuper {
0054     __le32 magic;       /* 4: log lv identifier */
0055     __le32 version;     /* 4: version number */
0056     __le32 serial;      /* 4: log open/mount counter */
0057     __le32 size;        /* 4: size in number of LOGPSIZE blocks */
0058     __le32 bsize;       /* 4: logical block size in byte */
0059     __le32 l2bsize;     /* 4: log2 of bsize */
0060 
0061     __le32 flag;        /* 4: option */
0062     __le32 state;       /* 4: state - see below */
0063 
0064     __le32 end;     /* 4: addr of last log record set by logredo */
0065     uuid_t uuid;        /* 16: 128-bit journal uuid */
0066     char label[16];     /* 16: journal label */
0067     struct {
0068         uuid_t uuid;
0069     } active[MAX_ACTIVE];   /* 2048: active file systems list */
0070 };
0071 
0072 /* log flag: commit option (see jfs_filsys.h) */
0073 
0074 /* log state */
0075 #define LOGMOUNT    0   /* log mounted by lmLogInit() */
0076 #define LOGREDONE   1   /* log shutdown by lmLogShutdown().
0077                  * log redo completed by logredo().
0078                  */
0079 #define LOGWRAP     2   /* log wrapped */
0080 #define LOGREADERR  3   /* log read error detected in logredo() */
0081 
0082 
0083 /*
0084  *  log logical page
0085  *
0086  * (this comment should be rewritten !)
0087  * the header and trailer structures (h,t) will normally have
0088  * the same page and eor value.
0089  * An exception to this occurs when a complete page write is not
0090  * accomplished on a power failure. Since the hardware may "split write"
0091  * sectors in the page, any out of order sequence may occur during powerfail
0092  * and needs to be recognized during log replay.  The xor value is
0093  * an "exclusive or" of all log words in the page up to eor.  This
0094  * 32 bit eor is stored with the top 16 bits in the header and the
0095  * bottom 16 bits in the trailer.  logredo can easily recognize pages
0096  * that were not completed by reconstructing this eor and checking
0097  * the log page.
0098  *
0099  * Previous versions of the operating system did not allow split
0100  * writes and detected partially written records in logredo by
0101  * ordering the updates to the header, trailer, and the move of data
0102  * into the logdata area.  The order: (1) data is moved (2) header
0103  * is updated (3) trailer is updated.  In logredo, when the header
0104  * differed from the trailer, the header and trailer were reconciled
0105  * as follows: if h.page != t.page they were set to the smaller of
0106  * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
0107  * h.eor != t.eor they were set to the smaller of their two values.
0108  */
0109 struct logpage {
0110     struct {        /* header */
0111         __le32 page;    /* 4: log sequence page number */
0112         __le16 rsrvd;   /* 2: */
0113         __le16 eor; /* 2: end-of-log offset of lasrt record write */
0114     } h;
0115 
0116     __le32 data[LOGPSIZE / 4 - 4];  /* log record area */
0117 
0118     struct {        /* trailer */
0119         __le32 page;    /* 4: normally the same as h.page */
0120         __le16 rsrvd;   /* 2: */
0121         __le16 eor; /* 2: normally the same as h.eor */
0122     } t;
0123 };
0124 
0125 #define LOGPHDRSIZE 8   /* log page header size */
0126 #define LOGPTLRSIZE 8   /* log page trailer size */
0127 
0128 
0129 /*
0130  *  log record
0131  *
0132  * (this comment should be rewritten !)
0133  * jfs uses only "after" log records (only a single writer is allowed
0134  * in a page, pages are written to temporary paging space if
0135  * they must be written to disk before commit, and i/o is
0136  * scheduled for modified pages to their home location after
0137  * the log records containing the after values and the commit
0138  * record is written to the log on disk, undo discards the copy
0139  * in main-memory.)
0140  *
0141  * a log record consists of a data area of variable length followed by
0142  * a descriptor of fixed size LOGRDSIZE bytes.
0143  * the data area is rounded up to an integral number of 4-bytes and
0144  * must be no longer than LOGPSIZE.
0145  * the descriptor is of size of multiple of 4-bytes and aligned on a
0146  * 4-byte boundary.
0147  * records are packed one after the other in the data area of log pages.
0148  * (sometimes a DUMMY record is inserted so that at least one record ends
0149  * on every page or the longest record is placed on at most two pages).
0150  * the field eor in page header/trailer points to the byte following
0151  * the last record on a page.
0152  */
0153 
0154 /* log record types */
0155 #define LOG_COMMIT      0x8000
0156 #define LOG_SYNCPT      0x4000
0157 #define LOG_MOUNT       0x2000
0158 #define LOG_REDOPAGE        0x0800
0159 #define LOG_NOREDOPAGE      0x0080
0160 #define LOG_NOREDOINOEXT    0x0040
0161 #define LOG_UPDATEMAP       0x0008
0162 #define LOG_NOREDOFILE      0x0001
0163 
0164 /* REDOPAGE/NOREDOPAGE log record data type */
0165 #define LOG_INODE       0x0001
0166 #define LOG_XTREE       0x0002
0167 #define LOG_DTREE       0x0004
0168 #define LOG_BTROOT      0x0010
0169 #define LOG_EA          0x0020
0170 #define LOG_ACL         0x0040
0171 #define LOG_DATA        0x0080
0172 #define LOG_NEW         0x0100
0173 #define LOG_EXTEND      0x0200
0174 #define LOG_RELOCATE        0x0400
0175 #define LOG_DIR_XTREE       0x0800  /* Xtree is in directory inode */
0176 
0177 /* UPDATEMAP log record descriptor type */
0178 #define LOG_ALLOCXADLIST    0x0080
0179 #define LOG_ALLOCPXDLIST    0x0040
0180 #define LOG_ALLOCXAD        0x0020
0181 #define LOG_ALLOCPXD        0x0010
0182 #define LOG_FREEXADLIST     0x0008
0183 #define LOG_FREEPXDLIST     0x0004
0184 #define LOG_FREEXAD     0x0002
0185 #define LOG_FREEPXD     0x0001
0186 
0187 
0188 struct lrd {
0189     /*
0190      * type independent area
0191      */
0192     __le32 logtid;      /* 4: log transaction identifier */
0193     __le32 backchain;   /* 4: ptr to prev record of same transaction */
0194     __le16 type;        /* 2: record type */
0195     __le16 length;      /* 2: length of data in record (in byte) */
0196     __le32 aggregate;   /* 4: file system lv/aggregate */
0197     /* (16) */
0198 
0199     /*
0200      * type dependent area (20)
0201      */
0202     union {
0203 
0204         /*
0205          *  COMMIT: commit
0206          *
0207          * transaction commit: no type-dependent information;
0208          */
0209 
0210         /*
0211          *  REDOPAGE: after-image
0212          *
0213          * apply after-image;
0214          *
0215          * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
0216          */
0217         struct {
0218             __le32 fileset; /* 4: fileset number */
0219             __le32 inode;   /* 4: inode number */
0220             __le16 type;    /* 2: REDOPAGE record type */
0221             __le16 l2linesize;  /* 2: log2 of line size */
0222             pxd_t pxd;  /* 8: on-disk page pxd */
0223         } redopage; /* (20) */
0224 
0225         /*
0226          *  NOREDOPAGE: the page is freed
0227          *
0228          * do not apply after-image records which precede this record
0229          * in the log with the same page block number to this page.
0230          *
0231          * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
0232          */
0233         struct {
0234             __le32 fileset; /* 4: fileset number */
0235             __le32 inode;   /* 4: inode number */
0236             __le16 type;    /* 2: NOREDOPAGE record type */
0237             __le16 rsrvd;   /* 2: reserved */
0238             pxd_t pxd;  /* 8: on-disk page pxd */
0239         } noredopage;   /* (20) */
0240 
0241         /*
0242          *  UPDATEMAP: update block allocation map
0243          *
0244          * either in-line PXD,
0245          * or     out-of-line  XADLIST;
0246          *
0247          * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
0248          */
0249         struct {
0250             __le32 fileset; /* 4: fileset number */
0251             __le32 inode;   /* 4: inode number */
0252             __le16 type;    /* 2: UPDATEMAP record type */
0253             __le16 nxd; /* 2: number of extents */
0254             pxd_t pxd;  /* 8: pxd */
0255         } updatemap;    /* (20) */
0256 
0257         /*
0258          *  NOREDOINOEXT: the inode extent is freed
0259          *
0260          * do not apply after-image records which precede this
0261          * record in the log with the any of the 4 page block
0262          * numbers in this inode extent.
0263          *
0264          * NOTE: The fileset and pxd fields MUST remain in
0265          *       the same fields in the REDOPAGE record format.
0266          *
0267          */
0268         struct {
0269             __le32 fileset; /* 4: fileset number */
0270             __le32 iagnum;  /* 4: IAG number     */
0271             __le32 inoext_idx;  /* 4: inode extent index */
0272             pxd_t pxd;  /* 8: on-disk page pxd */
0273         } noredoinoext; /* (20) */
0274 
0275         /*
0276          *  SYNCPT: log sync point
0277          *
0278          * replay log up to syncpt address specified;
0279          */
0280         struct {
0281             __le32 sync;    /* 4: syncpt address (0 = here) */
0282         } syncpt;
0283 
0284         /*
0285          *  MOUNT: file system mount
0286          *
0287          * file system mount: no type-dependent information;
0288          */
0289 
0290         /*
0291          *  ? FREEXTENT: free specified extent(s)
0292          *
0293          * free specified extent(s) from block allocation map
0294          * N.B.: nextents should be length of data/sizeof(xad_t)
0295          */
0296         struct {
0297             __le32 type;    /* 4: FREEXTENT record type */
0298             __le32 nextent; /* 4: number of extents */
0299 
0300             /* data: PXD or XAD list */
0301         } freextent;
0302 
0303         /*
0304          *  ? NOREDOFILE: this file is freed
0305          *
0306          * do not apply records which precede this record in the log
0307          * with the same inode number.
0308          *
0309          * NOREDOFILE must be the first to be written at commit
0310          * (last to be read in logredo()) - it prevents
0311          * replay of preceding updates of all preceding generations
0312          * of the inumber esp. the on-disk inode itself.
0313          */
0314         struct {
0315             __le32 fileset; /* 4: fileset number */
0316             __le32 inode;   /* 4: inode number */
0317         } noredofile;
0318 
0319         /*
0320          *  ? NEWPAGE:
0321          *
0322          * metadata type dependent
0323          */
0324         struct {
0325             __le32 fileset; /* 4: fileset number */
0326             __le32 inode;   /* 4: inode number */
0327             __le32 type;    /* 4: NEWPAGE record type */
0328             pxd_t pxd;  /* 8: on-disk page pxd */
0329         } newpage;
0330 
0331         /*
0332          *  ? DUMMY: filler
0333          *
0334          * no type-dependent information
0335          */
0336     } log;
0337 };                  /* (36) */
0338 
0339 #define LOGRDSIZE   (sizeof(struct lrd))
0340 
0341 /*
0342  *  line vector descriptor
0343  */
0344 struct lvd {
0345     __le16 offset;
0346     __le16 length;
0347 };
0348 
0349 
0350 /*
0351  *  log logical volume
0352  */
0353 struct jfs_log {
0354 
0355     struct list_head sb_list;/*  This is used to sync metadata
0356                  *    before writing syncpt.
0357                  */
0358     struct list_head journal_list; /* Global list */
0359     struct block_device *bdev; /* 4: log lv pointer */
0360     int serial;     /* 4: log mount serial number */
0361 
0362     s64 base;       /* @8: log extent address (inline log ) */
0363     int size;       /* 4: log size in log page (in page) */
0364     int l2bsize;        /* 4: log2 of bsize */
0365 
0366     unsigned long flag; /* 4: flag */
0367 
0368     struct lbuf *lbuf_free; /* 4: free lbufs */
0369     wait_queue_head_t free_wait;    /* 4: */
0370 
0371     /* log write */
0372     int logtid;     /* 4: log tid */
0373     int page;       /* 4: page number of eol page */
0374     int eor;        /* 4: eor of last record in eol page */
0375     struct lbuf *bp;    /* 4: current log page buffer */
0376 
0377     struct mutex loglock;   /* 4: log write serialization lock */
0378 
0379     /* syncpt */
0380     int nextsync;       /* 4: bytes to write before next syncpt */
0381     int active;     /* 4: */
0382     wait_queue_head_t syncwait; /* 4: */
0383 
0384     /* commit */
0385     uint cflag;     /* 4: */
0386     struct list_head cqueue; /* FIFO commit queue */
0387     struct tblock *flush_tblk; /* tblk we're waiting on for flush */
0388     int gcrtc;      /* 4: GC_READY transaction count */
0389     struct tblock *gclrt;   /* 4: latest GC_READY transaction */
0390     spinlock_t gclock;  /* 4: group commit lock */
0391     int logsize;        /* 4: log data area size in byte */
0392     int lsn;        /* 4: end-of-log */
0393     int clsn;       /* 4: clsn */
0394     int syncpt;     /* 4: addr of last syncpt record */
0395     int sync;       /* 4: addr from last logsync() */
0396     struct list_head synclist;  /* 8: logsynclist anchor */
0397     spinlock_t synclock;    /* 4: synclist lock */
0398     struct lbuf *wqueue;    /* 4: log pageout queue */
0399     int count;      /* 4: count */
0400     uuid_t uuid;        /* 16: 128-bit uuid of log device */
0401 
0402     int no_integrity;   /* 3: flag to disable journaling to disk */
0403 };
0404 
0405 /*
0406  * Log flag
0407  */
0408 #define log_INLINELOG   1
0409 #define log_SYNCBARRIER 2
0410 #define log_QUIESCE 3
0411 #define log_FLUSH   4
0412 
0413 /*
0414  * group commit flag
0415  */
0416 /* jfs_log */
0417 #define logGC_PAGEOUT   0x00000001
0418 
0419 /* tblock/lbuf */
0420 #define tblkGC_QUEUE        0x0001
0421 #define tblkGC_READY        0x0002
0422 #define tblkGC_COMMIT       0x0004
0423 #define tblkGC_COMMITTED    0x0008
0424 #define tblkGC_EOP      0x0010
0425 #define tblkGC_FREE     0x0020
0426 #define tblkGC_LEADER       0x0040
0427 #define tblkGC_ERROR        0x0080
0428 #define tblkGC_LAZY     0x0100  // D230860
0429 #define tblkGC_UNLOCKED     0x0200  // D230860
0430 
0431 /*
0432  *      log cache buffer header
0433  */
0434 struct lbuf {
0435     struct jfs_log *l_log;  /* 4: log associated with buffer */
0436 
0437     /*
0438      * data buffer base area
0439      */
0440     uint l_flag;        /* 4: pageout control flags */
0441 
0442     struct lbuf *l_wqnext;  /* 4: write queue link */
0443     struct lbuf *l_freelist;    /* 4: freelistlink */
0444 
0445     int l_pn;       /* 4: log page number */
0446     int l_eor;      /* 4: log record eor */
0447     int l_ceor;     /* 4: committed log record eor */
0448 
0449     s64 l_blkno;        /* 8: log page block number */
0450     caddr_t l_ldata;    /* 4: data page */
0451     struct page *l_page;    /* The page itself */
0452     uint l_offset;      /* Offset of l_ldata within the page */
0453 
0454     wait_queue_head_t l_ioevent;    /* 4: i/o done event */
0455 };
0456 
0457 /* Reuse l_freelist for redrive list */
0458 #define l_redrive_next l_freelist
0459 
0460 /*
0461  *  logsynclist block
0462  *
0463  * common logsyncblk prefix for jbuf_t and tblock
0464  */
0465 struct logsyncblk {
0466     u16 xflag;      /* flags */
0467     u16 flag;       /* only meaninful in tblock */
0468     lid_t lid;      /* lock id */
0469     s32 lsn;        /* log sequence number */
0470     struct list_head synclist;  /* log sync list link */
0471 };
0472 
0473 /*
0474  *  logsynclist serialization (per log)
0475  */
0476 
0477 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
0478 #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
0479 #define LOGSYNC_UNLOCK(log, flags) \
0480     spin_unlock_irqrestore(&(log)->synclock, flags)
0481 
0482 /* compute the difference in bytes of lsn from sync point */
0483 #define logdiff(diff, lsn, log)\
0484 {\
0485     diff = (lsn) - (log)->syncpt;\
0486     if (diff < 0)\
0487         diff += (log)->logsize;\
0488 }
0489 
0490 extern int lmLogOpen(struct super_block *sb);
0491 extern int lmLogClose(struct super_block *sb);
0492 extern int lmLogShutdown(struct jfs_log * log);
0493 extern int lmLogInit(struct jfs_log * log);
0494 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
0495 extern int lmGroupCommit(struct jfs_log *, struct tblock *);
0496 extern int jfsIOWait(void *);
0497 extern void jfs_flush_journal(struct jfs_log * log, int wait);
0498 extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
0499 
0500 #endif              /* _H_JFS_LOGMGR */