fs/gfs2/lops.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
0004  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
0005  */
0006
0007 #include <linux/sched.h>
0008 #include <linux/slab.h>
0009 #include <linux/spinlock.h>
0010 #include <linux/completion.h>
0011 #include <linux/buffer_head.h>
0012 #include <linux/mempool.h>
0013 #include <linux/gfs2_ondisk.h>
0014 #include <linux/bio.h>
0015 #include <linux/fs.h>
0016 #include <linux/list_sort.h>
0017 #include <linux/blkdev.h>
0018
0019 #include "bmap.h"
0020 #include "dir.h"
0021 #include "gfs2.h"
0022 #include "incore.h"
0023 #include "inode.h"
0024 #include "glock.h"
0025 #include "glops.h"
0026 #include "log.h"
0027 #include "lops.h"
0028 #include "meta_io.h"
0029 #include "recovery.h"
0030 #include "rgrp.h"
0031 #include "trans.h"
0032 #include "util.h"
0033 #include "trace_gfs2.h"
0034
0035 /**
0036  * gfs2_pin - Pin a buffer in memory
0037  * @sdp: The superblock
0038  * @bh: The buffer to be pinned
0039  *
0040  * The log lock must be held when calling this function
0041  */
0042 void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
0043 {
0044     struct gfs2_bufdata *bd;
0045
0046     BUG_ON(!current->journal_info);
0047
0048     clear_buffer_dirty(bh);
0049     if (test_set_buffer_pinned(bh))
0050         gfs2_assert_withdraw(sdp, 0);
0051     if (!buffer_uptodate(bh))
0052         gfs2_io_error_bh_wd(sdp, bh);
0053     bd = bh->b_private;
0054     /* If this buffer is in the AIL and it has already been written
0055      * to in-place disk block, remove it from the AIL.
0056      */
0057     spin_lock(&sdp->sd_ail_lock);
0058     if (bd->bd_tr)
0059         list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
0060     spin_unlock(&sdp->sd_ail_lock);
0061     get_bh(bh);
0062     atomic_inc(&sdp->sd_log_pinned);
0063     trace_gfs2_pin(bd, 1);
0064 }
0065
0066 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
0067 {
0068     return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
0069 }
0070
0071 static void maybe_release_space(struct gfs2_bufdata *bd)
0072 {
0073     struct gfs2_glock *gl = bd->bd_gl;
0074     struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
0075     struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
0076     unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
0077     struct gfs2_bitmap *bi = rgd->rd_bits + index;
0078
0079     rgrp_lock_local(rgd);
0080     if (bi->bi_clone == NULL)
0081         goto out;
0082     if (sdp->sd_args.ar_discard)
0083         gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
0084     memcpy(bi->bi_clone + bi->bi_offset,
0085            bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
0086     clear_bit(GBF_FULL, &bi->bi_flags);
0087     rgd->rd_free_clone = rgd->rd_free;
0088     BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
0089     rgd->rd_extfail_pt = rgd->rd_free;
0090
0091 out:
0092     rgrp_unlock_local(rgd);
0093 }
0094
0095 /**
0096  * gfs2_unpin - Unpin a buffer
0097  * @sdp: the filesystem the buffer belongs to
0098  * @bh: The buffer to unpin
0099  * @tr: The system transaction being flushed
0100  */
0101
0102 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
0103                struct gfs2_trans *tr)
0104 {
0105     struct gfs2_bufdata *bd = bh->b_private;
0106
0107     BUG_ON(!buffer_uptodate(bh));
0108     BUG_ON(!buffer_pinned(bh));
0109
0110     lock_buffer(bh);
0111     mark_buffer_dirty(bh);
0112     clear_buffer_pinned(bh);
0113
0114     if (buffer_is_rgrp(bd))
0115         maybe_release_space(bd);
0116
0117     spin_lock(&sdp->sd_ail_lock);
0118     if (bd->bd_tr) {
0119         list_del(&bd->bd_ail_st_list);
0120         brelse(bh);
0121     } else {
0122         struct gfs2_glock *gl = bd->bd_gl;
0123         list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
0124         atomic_inc(&gl->gl_ail_count);
0125     }
0126     bd->bd_tr = tr;
0127     list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
0128     spin_unlock(&sdp->sd_ail_lock);
0129
0130     clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
0131     trace_gfs2_pin(bd, 0);
0132     unlock_buffer(bh);
0133     atomic_dec(&sdp->sd_log_pinned);
0134 }
0135
0136 void gfs2_log_incr_head(struct gfs2_sbd *sdp)
0137 {
0138     BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
0139            (sdp->sd_log_flush_head != sdp->sd_log_head));
0140
0141     if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
0142         sdp->sd_log_flush_head = 0;
0143 }
0144
0145 u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lblock)
0146 {
0147     struct gfs2_journal_extent *je;
0148
0149     list_for_each_entry(je, &jd->extent_list, list) {
0150         if (lblock >= je->lblock && lblock < je->lblock + je->blocks)
0151             return je->dblock + lblock - je->lblock;
0152     }
0153
0154     return -1;
0155 }
0156
0157 /**
0158  * gfs2_end_log_write_bh - end log write of pagecache data with buffers
0159  * @sdp: The superblock
0160  * @bvec: The bio_vec
0161  * @error: The i/o status
0162  *
0163  * This finds the relevant buffers and unlocks them and sets the
0164  * error flag according to the status of the i/o request. This is
0165  * used when the log is writing data which has an in-place version
0166  * that is pinned in the pagecache.
0167  */
0168
0169 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
0170                   struct bio_vec *bvec,
0171                   blk_status_t error)
0172 {
0173     struct buffer_head *bh, *next;
0174     struct page *page = bvec->bv_page;
0175     unsigned size;
0176
0177     bh = page_buffers(page);
0178     size = bvec->bv_len;
0179     while (bh_offset(bh) < bvec->bv_offset)
0180         bh = bh->b_this_page;
0181     do {
0182         if (error)
0183             mark_buffer_write_io_error(bh);
0184         unlock_buffer(bh);
0185         next = bh->b_this_page;
0186         size -= bh->b_size;
0187         brelse(bh);
0188         bh = next;
0189     } while(bh && size);
0190 }
0191
0192 /**
0193  * gfs2_end_log_write - end of i/o to the log
0194  * @bio: The bio
0195  *
0196  * Each bio_vec contains either data from the pagecache or data
0197  * relating to the log itself. Here we iterate over the bio_vec
0198  * array, processing both kinds of data.
0199  *
0200  */
0201
0202 static void gfs2_end_log_write(struct bio *bio)
0203 {
0204     struct gfs2_sbd *sdp = bio->bi_private;
0205     struct bio_vec *bvec;
0206     struct page *page;
0207     struct bvec_iter_all iter_all;
0208
0209     if (bio->bi_status) {
0210         if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status))
0211             fs_err(sdp, "Error %d writing to journal, jid=%u\n",
0212                    bio->bi_status, sdp->sd_jdesc->jd_jid);
0213         gfs2_withdraw_delayed(sdp);
0214         /* prevent more writes to the journal */
0215         clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
0216         wake_up(&sdp->sd_logd_waitq);
0217     }
0218
0219     bio_for_each_segment_all(bvec, bio, iter_all) {
0220         page = bvec->bv_page;
0221         if (page_has_buffers(page))
0222             gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
0223         else
0224             mempool_free(page, gfs2_page_pool);
0225     }
0226
0227     bio_put(bio);
0228     if (atomic_dec_and_test(&sdp->sd_log_in_flight))
0229         wake_up(&sdp->sd_log_flush_wait);
0230 }
0231
0232 /**
0233  * gfs2_log_submit_bio - Submit any pending log bio
0234  * @biop: Address of the bio pointer
0235  * @opf: REQ_OP | op_flags
0236  *
0237  * Submit any pending part-built or full bio to the block device. If
0238  * there is no pending bio, then this is a no-op.
0239  */
0240
0241 void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf)
0242 {
0243     struct bio *bio = *biop;
0244     if (bio) {
0245         struct gfs2_sbd *sdp = bio->bi_private;
0246         atomic_inc(&sdp->sd_log_in_flight);
0247         bio->bi_opf = opf;
0248         submit_bio(bio);
0249         *biop = NULL;
0250     }
0251 }
0252
0253 /**
0254  * gfs2_log_alloc_bio - Allocate a bio
0255  * @sdp: The super block
0256  * @blkno: The device block number we want to write to
0257  * @end_io: The bi_end_io callback
0258  *
0259  * Allocate a new bio, initialize it with the given parameters and return it.
0260  *
0261  * Returns: The newly allocated bio
0262  */
0263
0264 static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
0265                       bio_end_io_t *end_io)
0266 {
0267     struct super_block *sb = sdp->sd_vfs;
0268     struct bio *bio = bio_alloc(sb->s_bdev, BIO_MAX_VECS, 0, GFP_NOIO);
0269
0270     bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
0271     bio->bi_end_io = end_io;
0272     bio->bi_private = sdp;
0273
0274     return bio;
0275 }
0276
0277 /**
0278  * gfs2_log_get_bio - Get cached log bio, or allocate a new one
0279  * @sdp: The super block
0280  * @blkno: The device block number we want to write to
0281  * @biop: The bio to get or allocate
0282  * @op: REQ_OP
0283  * @end_io: The bi_end_io callback
0284  * @flush: Always flush the current bio and allocate a new one?
0285  *
0286  * If there is a cached bio, then if the next block number is sequential
0287  * with the previous one, return it, otherwise flush the bio to the
0288  * device. If there is no cached bio, or we just flushed it, then
0289  * allocate a new one.
0290  *
0291  * Returns: The bio to use for log writes
0292  */
0293
0294 static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
0295                     struct bio **biop, enum req_op op,
0296                     bio_end_io_t *end_io, bool flush)
0297 {
0298     struct bio *bio = *biop;
0299
0300     if (bio) {
0301         u64 nblk;
0302
0303         nblk = bio_end_sector(bio);
0304         nblk >>= sdp->sd_fsb2bb_shift;
0305         if (blkno == nblk && !flush)
0306             return bio;
0307         gfs2_log_submit_bio(biop, op);
0308     }
0309
0310     *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
0311     return *biop;
0312 }
0313
0314 /**
0315  * gfs2_log_write - write to log
0316  * @sdp: the filesystem
0317  * @jd: The journal descriptor
0318  * @page: the page to write
0319  * @size: the size of the data to write
0320  * @offset: the offset within the page
0321  * @blkno: block number of the log entry
0322  *
0323  * Try and add the page segment to the current bio. If that fails,
0324  * submit the current bio to the device and create a new one, and
0325  * then add the page segment to that.
0326  */
0327
0328 void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
0329             struct page *page, unsigned size, unsigned offset,
0330             u64 blkno)
0331 {
0332     struct bio *bio;
0333     int ret;
0334
0335     bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio, REQ_OP_WRITE,
0336                    gfs2_end_log_write, false);
0337     ret = bio_add_page(bio, page, size, offset);
0338     if (ret == 0) {
0339         bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio,
0340                        REQ_OP_WRITE, gfs2_end_log_write, true);
0341         ret = bio_add_page(bio, page, size, offset);
0342         WARN_ON(ret == 0);
0343     }
0344 }
0345
0346 /**
0347  * gfs2_log_write_bh - write a buffer's content to the log
0348  * @sdp: The super block
0349  * @bh: The buffer pointing to the in-place location
0350  *
0351  * This writes the content of the buffer to the next available location
0352  * in the log. The buffer will be unlocked once the i/o to the log has
0353  * completed.
0354  */
0355
0356 static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
0357 {
0358     u64 dblock;
0359
0360     dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
0361     gfs2_log_incr_head(sdp);
0362     gfs2_log_write(sdp, sdp->sd_jdesc, bh->b_page, bh->b_size,
0363                bh_offset(bh), dblock);
0364 }
0365
0366 /**
0367  * gfs2_log_write_page - write one block stored in a page, into the log
0368  * @sdp: The superblock
0369  * @page: The struct page
0370  *
0371  * This writes the first block-sized part of the page into the log. Note
0372  * that the page must have been allocated from the gfs2_page_pool mempool
0373  * and that after this has been called, ownership has been transferred and
0374  * the page may be freed at any time.
0375  */
0376
0377 static void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
0378 {
0379     struct super_block *sb = sdp->sd_vfs;
0380     u64 dblock;
0381
0382     dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
0383     gfs2_log_incr_head(sdp);
0384     gfs2_log_write(sdp, sdp->sd_jdesc, page, sb->s_blocksize, 0, dblock);
0385 }
0386
0387 /**
0388  * gfs2_end_log_read - end I/O callback for reads from the log
0389  * @bio: The bio
0390  *
0391  * Simply unlock the pages in the bio. The main thread will wait on them and
0392  * process them in order as necessary.
0393  */
0394
0395 static void gfs2_end_log_read(struct bio *bio)
0396 {
0397     struct page *page;
0398     struct bio_vec *bvec;
0399     struct bvec_iter_all iter_all;
0400
0401     bio_for_each_segment_all(bvec, bio, iter_all) {
0402         page = bvec->bv_page;
0403         if (bio->bi_status) {
0404             int err = blk_status_to_errno(bio->bi_status);
0405
0406             SetPageError(page);
0407             mapping_set_error(page->mapping, err);
0408         }
0409         unlock_page(page);
0410     }
0411
0412     bio_put(bio);
0413 }
0414
0415 /**
0416  * gfs2_jhead_pg_srch - Look for the journal head in a given page.
0417  * @jd: The journal descriptor
0418  * @head: The journal head to start from
0419  * @page: The page to look in
0420  *
0421  * Returns: 1 if found, 0 otherwise.
0422  */
0423
0424 static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
0425                   struct gfs2_log_header_host *head,
0426                   struct page *page)
0427 {
0428     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0429     struct gfs2_log_header_host lh;
0430     void *kaddr = kmap_atomic(page);
0431     unsigned int offset;
0432     bool ret = false;
0433
0434     for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
0435         if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
0436             if (lh.lh_sequence >= head->lh_sequence)
0437                 *head = lh;
0438             else {
0439                 ret = true;
0440                 break;
0441             }
0442         }
0443     }
0444     kunmap_atomic(kaddr);
0445     return ret;
0446 }
0447
0448 /**
0449  * gfs2_jhead_process_page - Search/cleanup a page
0450  * @jd: The journal descriptor
0451  * @index: Index of the page to look into
0452  * @head: The journal head to start from
0453  * @done: If set, perform only cleanup, else search and set if found.
0454  *
0455  * Find the folio with 'index' in the journal's mapping. Search the folio for
0456  * the journal head if requested (cleanup == false). Release refs on the
0457  * folio so the page cache can reclaim it. We grabbed a
0458  * reference on this folio twice, first when we did a find_or_create_page()
0459  * to obtain the folio to add it to the bio and second when we do a
0460  * filemap_get_folio() here to get the folio to wait on while I/O on it is being
0461  * completed.
0462  * This function is also used to free up a folio we might've grabbed but not
0463  * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
0464  * submitted the I/O, but we already found the jhead so we only need to drop
0465  * our references to the folio.
0466  */
0467
0468 static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
0469                     struct gfs2_log_header_host *head,
0470                     bool *done)
0471 {
0472     struct folio *folio;
0473
0474     folio = filemap_get_folio(jd->jd_inode->i_mapping, index);
0475
0476     folio_wait_locked(folio);
0477     if (folio_test_error(folio))
0478         *done = true;
0479
0480     if (!*done)
0481         *done = gfs2_jhead_pg_srch(jd, head, &folio->page);
0482
0483     /* filemap_get_folio() and the earlier find_or_create_page() */
0484     folio_put_refs(folio, 2);
0485 }
0486
0487 static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
0488 {
0489     struct bio *new;
0490
0491     new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO);
0492     bio_clone_blkg_association(new, prev);
0493     new->bi_iter.bi_sector = bio_end_sector(prev);
0494     bio_chain(new, prev);
0495     submit_bio(prev);
0496     return new;
0497 }
0498
0499 /**
0500  * gfs2_find_jhead - find the head of a log
0501  * @jd: The journal descriptor
0502  * @head: The log descriptor for the head of the log is returned here
0503  * @keep_cache: If set inode pages will not be truncated
0504  *
0505  * Do a search of a journal by reading it in large chunks using bios and find
0506  * the valid log entry with the highest sequence number.  (i.e. the log head)
0507  *
0508  * Returns: 0 on success, errno otherwise
0509  */
0510 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
0511             bool keep_cache)
0512 {
0513     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0514     struct address_space *mapping = jd->jd_inode->i_mapping;
0515     unsigned int block = 0, blocks_submitted = 0, blocks_read = 0;
0516     unsigned int bsize = sdp->sd_sb.sb_bsize, off;
0517     unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
0518     unsigned int shift = PAGE_SHIFT - bsize_shift;
0519     unsigned int max_blocks = 2 * 1024 * 1024 >> bsize_shift;
0520     struct gfs2_journal_extent *je;
0521     int sz, ret = 0;
0522     struct bio *bio = NULL;
0523     struct page *page = NULL;
0524     bool done = false;
0525     errseq_t since;
0526
0527     memset(head, 0, sizeof(*head));
0528     if (list_empty(&jd->extent_list))
0529         gfs2_map_journal_extents(sdp, jd);
0530
0531     since = filemap_sample_wb_err(mapping);
0532     list_for_each_entry(je, &jd->extent_list, list) {
0533         u64 dblock = je->dblock;
0534
0535         for (; block < je->lblock + je->blocks; block++, dblock++) {
0536             if (!page) {
0537                 page = find_or_create_page(mapping,
0538                         block >> shift, GFP_NOFS);
0539                 if (!page) {
0540                     ret = -ENOMEM;
0541                     done = true;
0542                     goto out;
0543                 }
0544                 off = 0;
0545             }
0546
0547             if (bio && (off || block < blocks_submitted + max_blocks)) {
0548                 sector_t sector = dblock << sdp->sd_fsb2bb_shift;
0549
0550                 if (bio_end_sector(bio) == sector) {
0551                     sz = bio_add_page(bio, page, bsize, off);
0552                     if (sz == bsize)
0553                         goto block_added;
0554                 }
0555                 if (off) {
0556                     unsigned int blocks =
0557                         (PAGE_SIZE - off) >> bsize_shift;
0558
0559                     bio = gfs2_chain_bio(bio, blocks);
0560                     goto add_block_to_new_bio;
0561                 }
0562             }
0563
0564             if (bio) {
0565                 blocks_submitted = block;
0566                 submit_bio(bio);
0567             }
0568
0569             bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read);
0570             bio->bi_opf = REQ_OP_READ;
0571 add_block_to_new_bio:
0572             sz = bio_add_page(bio, page, bsize, off);
0573             BUG_ON(sz != bsize);
0574 block_added:
0575             off += bsize;
0576             if (off == PAGE_SIZE)
0577                 page = NULL;
0578             if (blocks_submitted <= blocks_read + max_blocks) {
0579                 /* Keep at least one bio in flight */
0580                 continue;
0581             }
0582
0583             gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
0584             blocks_read += PAGE_SIZE >> bsize_shift;
0585             if (done)
0586                 goto out;  /* found */
0587         }
0588     }
0589
0590 out:
0591     if (bio)
0592         submit_bio(bio);
0593     while (blocks_read < block) {
0594         gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
0595         blocks_read += PAGE_SIZE >> bsize_shift;
0596     }
0597
0598     if (!ret)
0599         ret = filemap_check_wb_err(mapping, since);
0600
0601     if (!keep_cache)
0602         truncate_inode_pages(mapping, 0);
0603
0604     return ret;
0605 }
0606
0607 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
0608                       u32 ld_length, u32 ld_data1)
0609 {
0610     struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
0611     struct gfs2_log_descriptor *ld = page_address(page);
0612     clear_page(ld);
0613     ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
0614     ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
0615     ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
0616     ld->ld_type = cpu_to_be32(ld_type);
0617     ld->ld_length = cpu_to_be32(ld_length);
0618     ld->ld_data1 = cpu_to_be32(ld_data1);
0619     ld->ld_data2 = 0;
0620     return page;
0621 }
0622
0623 static void gfs2_check_magic(struct buffer_head *bh)
0624 {
0625     void *kaddr;
0626     __be32 *ptr;
0627
0628     clear_buffer_escaped(bh);
0629     kaddr = kmap_atomic(bh->b_page);
0630     ptr = kaddr + bh_offset(bh);
0631     if (*ptr == cpu_to_be32(GFS2_MAGIC))
0632         set_buffer_escaped(bh);
0633     kunmap_atomic(kaddr);
0634 }
0635
0636 static int blocknr_cmp(void *priv, const struct list_head *a,
0637                const struct list_head *b)
0638 {
0639     struct gfs2_bufdata *bda, *bdb;
0640
0641     bda = list_entry(a, struct gfs2_bufdata, bd_list);
0642     bdb = list_entry(b, struct gfs2_bufdata, bd_list);
0643
0644     if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
0645         return -1;
0646     if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
0647         return 1;
0648     return 0;
0649 }
0650
0651 static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
0652                 unsigned int total, struct list_head *blist,
0653                 bool is_databuf)
0654 {
0655     struct gfs2_log_descriptor *ld;
0656     struct gfs2_bufdata *bd1 = NULL, *bd2;
0657     struct page *page;
0658     unsigned int num;
0659     unsigned n;
0660     __be64 *ptr;
0661
0662     gfs2_log_lock(sdp);
0663     list_sort(NULL, blist, blocknr_cmp);
0664     bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
0665     while(total) {
0666         num = total;
0667         if (total > limit)
0668             num = limit;
0669         gfs2_log_unlock(sdp);
0670         page = gfs2_get_log_desc(sdp,
0671                      is_databuf ? GFS2_LOG_DESC_JDATA :
0672                      GFS2_LOG_DESC_METADATA, num + 1, num);
0673         ld = page_address(page);
0674         gfs2_log_lock(sdp);
0675         ptr = (__be64 *)(ld + 1);
0676
0677         n = 0;
0678         list_for_each_entry_continue(bd1, blist, bd_list) {
0679             *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
0680             if (is_databuf) {
0681                 gfs2_check_magic(bd1->bd_bh);
0682                 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
0683             }
0684             if (++n >= num)
0685                 break;
0686         }
0687
0688         gfs2_log_unlock(sdp);
0689         gfs2_log_write_page(sdp, page);
0690         gfs2_log_lock(sdp);
0691
0692         n = 0;
0693         list_for_each_entry_continue(bd2, blist, bd_list) {
0694             get_bh(bd2->bd_bh);
0695             gfs2_log_unlock(sdp);
0696             lock_buffer(bd2->bd_bh);
0697
0698             if (buffer_escaped(bd2->bd_bh)) {
0699                 void *kaddr;
0700                 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
0701                 ptr = page_address(page);
0702                 kaddr = kmap_atomic(bd2->bd_bh->b_page);
0703                 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
0704                        bd2->bd_bh->b_size);
0705                 kunmap_atomic(kaddr);
0706                 *(__be32 *)ptr = 0;
0707                 clear_buffer_escaped(bd2->bd_bh);
0708                 unlock_buffer(bd2->bd_bh);
0709                 brelse(bd2->bd_bh);
0710                 gfs2_log_write_page(sdp, page);
0711             } else {
0712                 gfs2_log_write_bh(sdp, bd2->bd_bh);
0713             }
0714             gfs2_log_lock(sdp);
0715             if (++n >= num)
0716                 break;
0717         }
0718
0719         BUG_ON(total < num);
0720         total -= num;
0721     }
0722     gfs2_log_unlock(sdp);
0723 }
0724
0725 static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
0726 {
0727     unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
0728     unsigned int nbuf;
0729     if (tr == NULL)
0730         return;
0731     nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
0732     gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
0733 }
0734
0735 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
0736 {
0737     struct list_head *head;
0738     struct gfs2_bufdata *bd;
0739
0740     if (tr == NULL)
0741         return;
0742
0743     head = &tr->tr_buf;
0744     while (!list_empty(head)) {
0745         bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
0746         list_del_init(&bd->bd_list);
0747         gfs2_unpin(sdp, bd->bd_bh, tr);
0748     }
0749 }
0750
0751 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
0752                    struct gfs2_log_header_host *head, int pass)
0753 {
0754     if (pass != 0)
0755         return;
0756
0757     jd->jd_found_blocks = 0;
0758     jd->jd_replayed_blocks = 0;
0759 }
0760
0761 #define obsolete_rgrp_replay \
0762 "Replaying 0x%llx from jid=%d/0x%llx but we already have a bh!\n"
0763 #define obsolete_rgrp_replay2 \
0764 "busy:%d, pinned:%d rg_gen:0x%llx, j_gen:0x%llx\n"
0765
0766 static void obsolete_rgrp(struct gfs2_jdesc *jd, struct buffer_head *bh_log,
0767               u64 blkno)
0768 {
0769     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0770     struct gfs2_rgrpd *rgd;
0771     struct gfs2_rgrp *jrgd = (struct gfs2_rgrp *)bh_log->b_data;
0772
0773     rgd = gfs2_blk2rgrpd(sdp, blkno, false);
0774     if (rgd && rgd->rd_addr == blkno &&
0775         rgd->rd_bits && rgd->rd_bits->bi_bh) {
0776         fs_info(sdp, obsolete_rgrp_replay, (unsigned long long)blkno,
0777             jd->jd_jid, bh_log->b_blocknr);
0778         fs_info(sdp, obsolete_rgrp_replay2,
0779             buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
0780             buffer_pinned(rgd->rd_bits->bi_bh),
0781             rgd->rd_igeneration,
0782             be64_to_cpu(jrgd->rg_igeneration));
0783         gfs2_dump_glock(NULL, rgd->rd_gl, true);
0784     }
0785 }
0786
0787 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
0788                 struct gfs2_log_descriptor *ld, __be64 *ptr,
0789                 int pass)
0790 {
0791     struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
0792     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0793     struct gfs2_glock *gl = ip->i_gl;
0794     unsigned int blks = be32_to_cpu(ld->ld_data1);
0795     struct buffer_head *bh_log, *bh_ip;
0796     u64 blkno;
0797     int error = 0;
0798
0799     if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
0800         return 0;
0801
0802     gfs2_replay_incr_blk(jd, &start);
0803
0804     for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
0805         blkno = be64_to_cpu(*ptr++);
0806
0807         jd->jd_found_blocks++;
0808
0809         if (gfs2_revoke_check(jd, blkno, start))
0810             continue;
0811
0812         error = gfs2_replay_read_block(jd, start, &bh_log);
0813         if (error)
0814             return error;
0815
0816         bh_ip = gfs2_meta_new(gl, blkno);
0817         memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
0818
0819         if (gfs2_meta_check(sdp, bh_ip))
0820             error = -EIO;
0821         else {
0822             struct gfs2_meta_header *mh =
0823                 (struct gfs2_meta_header *)bh_ip->b_data;
0824
0825             if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG))
0826                 obsolete_rgrp(jd, bh_log, blkno);
0827
0828             mark_buffer_dirty(bh_ip);
0829         }
0830         brelse(bh_log);
0831         brelse(bh_ip);
0832
0833         if (error)
0834             break;
0835
0836         jd->jd_replayed_blocks++;
0837     }
0838
0839     return error;
0840 }
0841
0842 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
0843 {
0844     struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
0845     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0846
0847     if (error) {
0848         gfs2_inode_metasync(ip->i_gl);
0849         return;
0850     }
0851     if (pass != 1)
0852         return;
0853
0854     gfs2_inode_metasync(ip->i_gl);
0855
0856     fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
0857             jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
0858 }
0859
0860 static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
0861 {
0862     struct gfs2_meta_header *mh;
0863     unsigned int offset;
0864     struct list_head *head = &sdp->sd_log_revokes;
0865     struct gfs2_bufdata *bd;
0866     struct page *page;
0867     unsigned int length;
0868
0869     gfs2_flush_revokes(sdp);
0870     if (!sdp->sd_log_num_revoke)
0871         return;
0872
0873     length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke);
0874     page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
0875     offset = sizeof(struct gfs2_log_descriptor);
0876
0877     list_for_each_entry(bd, head, bd_list) {
0878         sdp->sd_log_num_revoke--;
0879
0880         if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
0881             gfs2_log_write_page(sdp, page);
0882             page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
0883             mh = page_address(page);
0884             clear_page(mh);
0885             mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
0886             mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
0887             mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
0888             offset = sizeof(struct gfs2_meta_header);
0889         }
0890
0891         *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
0892         offset += sizeof(u64);
0893     }
0894     gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
0895
0896     gfs2_log_write_page(sdp, page);
0897 }
0898
0899 void gfs2_drain_revokes(struct gfs2_sbd *sdp)
0900 {
0901     struct list_head *head = &sdp->sd_log_revokes;
0902     struct gfs2_bufdata *bd;
0903     struct gfs2_glock *gl;
0904
0905     while (!list_empty(head)) {
0906         bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
0907         list_del_init(&bd->bd_list);
0908         gl = bd->bd_gl;
0909         gfs2_glock_remove_revoke(gl);
0910         kmem_cache_free(gfs2_bufdata_cachep, bd);
0911     }
0912 }
0913
0914 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
0915 {
0916     gfs2_drain_revokes(sdp);
0917 }
0918
0919 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
0920                   struct gfs2_log_header_host *head, int pass)
0921 {
0922     if (pass != 0)
0923         return;
0924
0925     jd->jd_found_revokes = 0;
0926     jd->jd_replay_tail = head->lh_tail;
0927 }
0928
0929 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
0930                    struct gfs2_log_descriptor *ld, __be64 *ptr,
0931                    int pass)
0932 {
0933     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0934     unsigned int blks = be32_to_cpu(ld->ld_length);
0935     unsigned int revokes = be32_to_cpu(ld->ld_data1);
0936     struct buffer_head *bh;
0937     unsigned int offset;
0938     u64 blkno;
0939     int first = 1;
0940     int error;
0941
0942     if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
0943         return 0;
0944
0945     offset = sizeof(struct gfs2_log_descriptor);
0946
0947     for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
0948         error = gfs2_replay_read_block(jd, start, &bh);
0949         if (error)
0950             return error;
0951
0952         if (!first)
0953             gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
0954
0955         while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
0956             blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
0957
0958             error = gfs2_revoke_add(jd, blkno, start);
0959             if (error < 0) {
0960                 brelse(bh);
0961                 return error;
0962             }
0963             else if (error)
0964                 jd->jd_found_revokes++;
0965
0966             if (!--revokes)
0967                 break;
0968             offset += sizeof(u64);
0969         }
0970
0971         brelse(bh);
0972         offset = sizeof(struct gfs2_meta_header);
0973         first = 0;
0974     }
0975
0976     return 0;
0977 }
0978
0979 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
0980 {
0981     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0982
0983     if (error) {
0984         gfs2_revoke_clean(jd);
0985         return;
0986     }
0987     if (pass != 1)
0988         return;
0989
0990     fs_info(sdp, "jid=%u: Found %u revoke tags\n",
0991             jd->jd_jid, jd->jd_found_revokes);
0992
0993     gfs2_revoke_clean(jd);
0994 }
0995
0996 /**
0997  * databuf_lo_before_commit - Scan the data buffers, writing as we go
0998  * @sdp: The filesystem
0999  * @tr: The system transaction being flushed
1000  */
1001
1002 static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
1003 {
1004     unsigned int limit = databuf_limit(sdp);
1005     unsigned int nbuf;
1006     if (tr == NULL)
1007         return;
1008     nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
1009     gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
1010 }
1011
1012 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
1013                     struct gfs2_log_descriptor *ld,
1014                     __be64 *ptr, int pass)
1015 {
1016     struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1017     struct gfs2_glock *gl = ip->i_gl;
1018     unsigned int blks = be32_to_cpu(ld->ld_data1);
1019     struct buffer_head *bh_log, *bh_ip;
1020     u64 blkno;
1021     u64 esc;
1022     int error = 0;
1023
1024     if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
1025         return 0;
1026
1027     gfs2_replay_incr_blk(jd, &start);
1028     for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
1029         blkno = be64_to_cpu(*ptr++);
1030         esc = be64_to_cpu(*ptr++);
1031
1032         jd->jd_found_blocks++;
1033
1034         if (gfs2_revoke_check(jd, blkno, start))
1035             continue;
1036
1037         error = gfs2_replay_read_block(jd, start, &bh_log);
1038         if (error)
1039             return error;
1040
1041         bh_ip = gfs2_meta_new(gl, blkno);
1042         memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
1043
1044         /* Unescape */
1045         if (esc) {
1046             __be32 *eptr = (__be32 *)bh_ip->b_data;
1047             *eptr = cpu_to_be32(GFS2_MAGIC);
1048         }
1049         mark_buffer_dirty(bh_ip);
1050
1051         brelse(bh_log);
1052         brelse(bh_ip);
1053
1054         jd->jd_replayed_blocks++;
1055     }
1056
1057     return error;
1058 }
1059
1060 /* FIXME: sort out accounting for log blocks etc. */
1061
1062 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
1063 {
1064     struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1065     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
1066
1067     if (error) {
1068         gfs2_inode_metasync(ip->i_gl);
1069         return;
1070     }
1071     if (pass != 1)
1072         return;
1073
1074     /* data sync? */
1075     gfs2_inode_metasync(ip->i_gl);
1076
1077     fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
1078         jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
1079 }
1080
1081 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
1082 {
1083     struct list_head *head;
1084     struct gfs2_bufdata *bd;
1085
1086     if (tr == NULL)
1087         return;
1088
1089     head = &tr->tr_databuf;
1090     while (!list_empty(head)) {
1091         bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
1092         list_del_init(&bd->bd_list);
1093         gfs2_unpin(sdp, bd->bd_bh, tr);
1094     }
1095 }
1096
1097
1098 static const struct gfs2_log_operations gfs2_buf_lops = {
1099     .lo_before_commit = buf_lo_before_commit,
1100     .lo_after_commit = buf_lo_after_commit,
1101     .lo_before_scan = buf_lo_before_scan,
1102     .lo_scan_elements = buf_lo_scan_elements,
1103     .lo_after_scan = buf_lo_after_scan,
1104     .lo_name = "buf",
1105 };
1106
1107 static const struct gfs2_log_operations gfs2_revoke_lops = {
1108     .lo_before_commit = revoke_lo_before_commit,
1109     .lo_after_commit = revoke_lo_after_commit,
1110     .lo_before_scan = revoke_lo_before_scan,
1111     .lo_scan_elements = revoke_lo_scan_elements,
1112     .lo_after_scan = revoke_lo_after_scan,
1113     .lo_name = "revoke",
1114 };
1115
1116 static const struct gfs2_log_operations gfs2_databuf_lops = {
1117     .lo_before_commit = databuf_lo_before_commit,
1118     .lo_after_commit = databuf_lo_after_commit,
1119     .lo_scan_elements = databuf_lo_scan_elements,
1120     .lo_after_scan = databuf_lo_after_scan,
1121     .lo_name = "databuf",
1122 };
1123
1124 const struct gfs2_log_operations *gfs2_log_ops[] = {
1125     &gfs2_databuf_lops,
1126     &gfs2_buf_lops,
1127     &gfs2_revoke_lops,
1128     NULL,
1129 };
1130