Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * Meta data file for NILFS
0004  *
0005  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
0006  *
0007  * Written by Ryusuke Konishi.
0008  */
0009 
0010 #include <linux/buffer_head.h>
0011 #include <linux/mpage.h>
0012 #include <linux/mm.h>
0013 #include <linux/writeback.h>
0014 #include <linux/backing-dev.h>
0015 #include <linux/swap.h>
0016 #include <linux/slab.h>
0017 #include "nilfs.h"
0018 #include "btnode.h"
0019 #include "segment.h"
0020 #include "page.h"
0021 #include "mdt.h"
0022 #include "alloc.h"      /* nilfs_palloc_destroy_cache() */
0023 
0024 #include <trace/events/nilfs2.h>
0025 
0026 #define NILFS_MDT_MAX_RA_BLOCKS     (16 - 1)
0027 
0028 
0029 static int
0030 nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
0031                struct buffer_head *bh,
0032                void (*init_block)(struct inode *,
0033                           struct buffer_head *, void *))
0034 {
0035     struct nilfs_inode_info *ii = NILFS_I(inode);
0036     void *kaddr;
0037     int ret;
0038 
0039     /* Caller exclude read accesses using page lock */
0040 
0041     /* set_buffer_new(bh); */
0042     bh->b_blocknr = 0;
0043 
0044     ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh);
0045     if (unlikely(ret))
0046         return ret;
0047 
0048     set_buffer_mapped(bh);
0049 
0050     kaddr = kmap_atomic(bh->b_page);
0051     memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
0052     if (init_block)
0053         init_block(inode, bh, kaddr);
0054     flush_dcache_page(bh->b_page);
0055     kunmap_atomic(kaddr);
0056 
0057     set_buffer_uptodate(bh);
0058     mark_buffer_dirty(bh);
0059     nilfs_mdt_mark_dirty(inode);
0060 
0061     trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block);
0062 
0063     return 0;
0064 }
0065 
0066 static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
0067                   struct buffer_head **out_bh,
0068                   void (*init_block)(struct inode *,
0069                              struct buffer_head *,
0070                              void *))
0071 {
0072     struct super_block *sb = inode->i_sb;
0073     struct nilfs_transaction_info ti;
0074     struct buffer_head *bh;
0075     int err;
0076 
0077     nilfs_transaction_begin(sb, &ti, 0);
0078 
0079     err = -ENOMEM;
0080     bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0);
0081     if (unlikely(!bh))
0082         goto failed_unlock;
0083 
0084     err = -EEXIST;
0085     if (buffer_uptodate(bh))
0086         goto failed_bh;
0087 
0088     wait_on_buffer(bh);
0089     if (buffer_uptodate(bh))
0090         goto failed_bh;
0091 
0092     bh->b_bdev = sb->s_bdev;
0093     err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
0094     if (likely(!err)) {
0095         get_bh(bh);
0096         *out_bh = bh;
0097     }
0098 
0099  failed_bh:
0100     unlock_page(bh->b_page);
0101     put_page(bh->b_page);
0102     brelse(bh);
0103 
0104  failed_unlock:
0105     if (likely(!err))
0106         err = nilfs_transaction_commit(sb);
0107     else
0108         nilfs_transaction_abort(sb);
0109 
0110     return err;
0111 }
0112 
0113 static int
0114 nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf,
0115                struct buffer_head **out_bh)
0116 {
0117     struct buffer_head *bh;
0118     __u64 blknum = 0;
0119     int ret = -ENOMEM;
0120 
0121     bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
0122     if (unlikely(!bh))
0123         goto failed;
0124 
0125     ret = -EEXIST; /* internal code */
0126     if (buffer_uptodate(bh))
0127         goto out;
0128 
0129     if (opf & REQ_RAHEAD) {
0130         if (!trylock_buffer(bh)) {
0131             ret = -EBUSY;
0132             goto failed_bh;
0133         }
0134     } else /* opf == REQ_OP_READ */
0135         lock_buffer(bh);
0136 
0137     if (buffer_uptodate(bh)) {
0138         unlock_buffer(bh);
0139         goto out;
0140     }
0141 
0142     ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
0143     if (unlikely(ret)) {
0144         unlock_buffer(bh);
0145         goto failed_bh;
0146     }
0147     map_bh(bh, inode->i_sb, (sector_t)blknum);
0148 
0149     bh->b_end_io = end_buffer_read_sync;
0150     get_bh(bh);
0151     submit_bh(opf, bh);
0152     ret = 0;
0153 
0154     trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff,
0155                       opf & REQ_OP_MASK);
0156  out:
0157     get_bh(bh);
0158     *out_bh = bh;
0159 
0160  failed_bh:
0161     unlock_page(bh->b_page);
0162     put_page(bh->b_page);
0163     brelse(bh);
0164  failed:
0165     return ret;
0166 }
0167 
0168 static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
0169                 int readahead, struct buffer_head **out_bh)
0170 {
0171     struct buffer_head *first_bh, *bh;
0172     unsigned long blkoff;
0173     int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
0174     int err;
0175 
0176     err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, &first_bh);
0177     if (err == -EEXIST) /* internal code */
0178         goto out;
0179 
0180     if (unlikely(err))
0181         goto failed;
0182 
0183     if (readahead) {
0184         blkoff = block + 1;
0185         for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
0186             err = nilfs_mdt_submit_block(inode, blkoff,
0187                         REQ_OP_READ | REQ_RAHEAD, &bh);
0188             if (likely(!err || err == -EEXIST))
0189                 brelse(bh);
0190             else if (err != -EBUSY)
0191                 break;
0192                 /* abort readahead if bmap lookup failed */
0193             if (!buffer_locked(first_bh))
0194                 goto out_no_wait;
0195         }
0196     }
0197 
0198     wait_on_buffer(first_bh);
0199 
0200  out_no_wait:
0201     err = -EIO;
0202     if (!buffer_uptodate(first_bh)) {
0203         nilfs_err(inode->i_sb,
0204               "I/O error reading meta-data file (ino=%lu, block-offset=%lu)",
0205               inode->i_ino, block);
0206         goto failed_bh;
0207     }
0208  out:
0209     *out_bh = first_bh;
0210     return 0;
0211 
0212  failed_bh:
0213     brelse(first_bh);
0214  failed:
0215     return err;
0216 }
0217 
0218 /**
0219  * nilfs_mdt_get_block - read or create a buffer on meta data file.
0220  * @inode: inode of the meta data file
0221  * @blkoff: block offset
0222  * @create: create flag
0223  * @init_block: initializer used for newly allocated block
0224  * @out_bh: output of a pointer to the buffer_head
0225  *
0226  * nilfs_mdt_get_block() looks up the specified buffer and tries to create
0227  * a new buffer if @create is not zero.  On success, the returned buffer is
0228  * assured to be either existing or formatted using a buffer lock on success.
0229  * @out_bh is substituted only when zero is returned.
0230  *
0231  * Return Value: On success, it returns 0. On error, the following negative
0232  * error code is returned.
0233  *
0234  * %-ENOMEM - Insufficient memory available.
0235  *
0236  * %-EIO - I/O error
0237  *
0238  * %-ENOENT - the specified block does not exist (hole block)
0239  *
0240  * %-EROFS - Read only filesystem (for create mode)
0241  */
0242 int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
0243             void (*init_block)(struct inode *,
0244                        struct buffer_head *, void *),
0245             struct buffer_head **out_bh)
0246 {
0247     int ret;
0248 
0249     /* Should be rewritten with merging nilfs_mdt_read_block() */
0250  retry:
0251     ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
0252     if (!create || ret != -ENOENT)
0253         return ret;
0254 
0255     ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block);
0256     if (unlikely(ret == -EEXIST)) {
0257         /* create = 0; */  /* limit read-create loop retries */
0258         goto retry;
0259     }
0260     return ret;
0261 }
0262 
0263 /**
0264  * nilfs_mdt_find_block - find and get a buffer on meta data file.
0265  * @inode: inode of the meta data file
0266  * @start: start block offset (inclusive)
0267  * @end: end block offset (inclusive)
0268  * @blkoff: block offset
0269  * @out_bh: place to store a pointer to buffer_head struct
0270  *
0271  * nilfs_mdt_find_block() looks up an existing block in range of
0272  * [@start, @end] and stores pointer to a buffer head of the block to
0273  * @out_bh, and block offset to @blkoff, respectively.  @out_bh and
0274  * @blkoff are substituted only when zero is returned.
0275  *
0276  * Return Value: On success, it returns 0. On error, the following negative
0277  * error code is returned.
0278  *
0279  * %-ENOMEM - Insufficient memory available.
0280  *
0281  * %-EIO - I/O error
0282  *
0283  * %-ENOENT - no block was found in the range
0284  */
0285 int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
0286              unsigned long end, unsigned long *blkoff,
0287              struct buffer_head **out_bh)
0288 {
0289     __u64 next;
0290     int ret;
0291 
0292     if (unlikely(start > end))
0293         return -ENOENT;
0294 
0295     ret = nilfs_mdt_read_block(inode, start, true, out_bh);
0296     if (!ret) {
0297         *blkoff = start;
0298         goto out;
0299     }
0300     if (unlikely(ret != -ENOENT || start == ULONG_MAX))
0301         goto out;
0302 
0303     ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
0304     if (!ret) {
0305         if (next <= end) {
0306             ret = nilfs_mdt_read_block(inode, next, true, out_bh);
0307             if (!ret)
0308                 *blkoff = next;
0309         } else {
0310             ret = -ENOENT;
0311         }
0312     }
0313 out:
0314     return ret;
0315 }
0316 
0317 /**
0318  * nilfs_mdt_delete_block - make a hole on the meta data file.
0319  * @inode: inode of the meta data file
0320  * @block: block offset
0321  *
0322  * Return Value: On success, zero is returned.
0323  * On error, one of the following negative error code is returned.
0324  *
0325  * %-ENOMEM - Insufficient memory available.
0326  *
0327  * %-EIO - I/O error
0328  */
0329 int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
0330 {
0331     struct nilfs_inode_info *ii = NILFS_I(inode);
0332     int err;
0333 
0334     err = nilfs_bmap_delete(ii->i_bmap, block);
0335     if (!err || err == -ENOENT) {
0336         nilfs_mdt_mark_dirty(inode);
0337         nilfs_mdt_forget_block(inode, block);
0338     }
0339     return err;
0340 }
0341 
0342 /**
0343  * nilfs_mdt_forget_block - discard dirty state and try to remove the page
0344  * @inode: inode of the meta data file
0345  * @block: block offset
0346  *
0347  * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
0348  * tries to release the page including the buffer from a page cache.
0349  *
0350  * Return Value: On success, 0 is returned. On error, one of the following
0351  * negative error code is returned.
0352  *
0353  * %-EBUSY - page has an active buffer.
0354  *
0355  * %-ENOENT - page cache has no page addressed by the offset.
0356  */
0357 int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
0358 {
0359     pgoff_t index = (pgoff_t)block >>
0360         (PAGE_SHIFT - inode->i_blkbits);
0361     struct page *page;
0362     unsigned long first_block;
0363     int ret = 0;
0364     int still_dirty;
0365 
0366     page = find_lock_page(inode->i_mapping, index);
0367     if (!page)
0368         return -ENOENT;
0369 
0370     wait_on_page_writeback(page);
0371 
0372     first_block = (unsigned long)index <<
0373         (PAGE_SHIFT - inode->i_blkbits);
0374     if (page_has_buffers(page)) {
0375         struct buffer_head *bh;
0376 
0377         bh = nilfs_page_get_nth_block(page, block - first_block);
0378         nilfs_forget_buffer(bh);
0379     }
0380     still_dirty = PageDirty(page);
0381     unlock_page(page);
0382     put_page(page);
0383 
0384     if (still_dirty ||
0385         invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
0386         ret = -EBUSY;
0387     return ret;
0388 }
0389 
0390 int nilfs_mdt_fetch_dirty(struct inode *inode)
0391 {
0392     struct nilfs_inode_info *ii = NILFS_I(inode);
0393 
0394     if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) {
0395         set_bit(NILFS_I_DIRTY, &ii->i_state);
0396         return 1;
0397     }
0398     return test_bit(NILFS_I_DIRTY, &ii->i_state);
0399 }
0400 
0401 static int
0402 nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
0403 {
0404     struct inode *inode = page->mapping->host;
0405     struct super_block *sb;
0406     int err = 0;
0407 
0408     if (inode && sb_rdonly(inode->i_sb)) {
0409         /*
0410          * It means that filesystem was remounted in read-only
0411          * mode because of error or metadata corruption. But we
0412          * have dirty pages that try to be flushed in background.
0413          * So, here we simply discard this dirty page.
0414          */
0415         nilfs_clear_dirty_page(page, false);
0416         unlock_page(page);
0417         return -EROFS;
0418     }
0419 
0420     redirty_page_for_writepage(wbc, page);
0421     unlock_page(page);
0422 
0423     if (!inode)
0424         return 0;
0425 
0426     sb = inode->i_sb;
0427 
0428     if (wbc->sync_mode == WB_SYNC_ALL)
0429         err = nilfs_construct_segment(sb);
0430     else if (wbc->for_reclaim)
0431         nilfs_flush_segment(sb, inode->i_ino);
0432 
0433     return err;
0434 }
0435 
0436 
0437 static const struct address_space_operations def_mdt_aops = {
0438     .dirty_folio        = block_dirty_folio,
0439     .invalidate_folio   = block_invalidate_folio,
0440     .writepage      = nilfs_mdt_write_page,
0441 };
0442 
0443 static const struct inode_operations def_mdt_iops;
0444 static const struct file_operations def_mdt_fops;
0445 
0446 
0447 int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
0448 {
0449     struct nilfs_mdt_info *mi;
0450 
0451     mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
0452     if (!mi)
0453         return -ENOMEM;
0454 
0455     init_rwsem(&mi->mi_sem);
0456     inode->i_private = mi;
0457 
0458     inode->i_mode = S_IFREG;
0459     mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
0460 
0461     inode->i_op = &def_mdt_iops;
0462     inode->i_fop = &def_mdt_fops;
0463     inode->i_mapping->a_ops = &def_mdt_aops;
0464 
0465     return 0;
0466 }
0467 
0468 /**
0469  * nilfs_mdt_clear - do cleanup for the metadata file
0470  * @inode: inode of the metadata file
0471  */
0472 void nilfs_mdt_clear(struct inode *inode)
0473 {
0474     struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
0475     struct nilfs_shadow_map *shadow = mdi->mi_shadow;
0476 
0477     if (mdi->mi_palloc_cache)
0478         nilfs_palloc_destroy_cache(inode);
0479 
0480     if (shadow) {
0481         struct inode *s_inode = shadow->inode;
0482 
0483         shadow->inode = NULL;
0484         iput(s_inode);
0485         mdi->mi_shadow = NULL;
0486     }
0487 }
0488 
0489 /**
0490  * nilfs_mdt_destroy - release resources used by the metadata file
0491  * @inode: inode of the metadata file
0492  */
0493 void nilfs_mdt_destroy(struct inode *inode)
0494 {
0495     struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
0496 
0497     kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
0498     kfree(mdi);
0499 }
0500 
0501 void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
0502                   unsigned int header_size)
0503 {
0504     struct nilfs_mdt_info *mi = NILFS_MDT(inode);
0505 
0506     mi->mi_entry_size = entry_size;
0507     mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
0508     mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
0509 }
0510 
0511 /**
0512  * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
0513  * @inode: inode of the metadata file
0514  * @shadow: shadow mapping
0515  */
0516 int nilfs_mdt_setup_shadow_map(struct inode *inode,
0517                    struct nilfs_shadow_map *shadow)
0518 {
0519     struct nilfs_mdt_info *mi = NILFS_MDT(inode);
0520     struct inode *s_inode;
0521 
0522     INIT_LIST_HEAD(&shadow->frozen_buffers);
0523 
0524     s_inode = nilfs_iget_for_shadow(inode);
0525     if (IS_ERR(s_inode))
0526         return PTR_ERR(s_inode);
0527 
0528     shadow->inode = s_inode;
0529     mi->mi_shadow = shadow;
0530     return 0;
0531 }
0532 
0533 /**
0534  * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map
0535  * @inode: inode of the metadata file
0536  */
0537 int nilfs_mdt_save_to_shadow_map(struct inode *inode)
0538 {
0539     struct nilfs_mdt_info *mi = NILFS_MDT(inode);
0540     struct nilfs_inode_info *ii = NILFS_I(inode);
0541     struct nilfs_shadow_map *shadow = mi->mi_shadow;
0542     struct inode *s_inode = shadow->inode;
0543     int ret;
0544 
0545     ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping);
0546     if (ret)
0547         goto out;
0548 
0549     ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping,
0550                      ii->i_assoc_inode->i_mapping);
0551     if (ret)
0552         goto out;
0553 
0554     nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store);
0555  out:
0556     return ret;
0557 }
0558 
0559 int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
0560 {
0561     struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
0562     struct buffer_head *bh_frozen;
0563     struct page *page;
0564     int blkbits = inode->i_blkbits;
0565 
0566     page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index);
0567     if (!page)
0568         return -ENOMEM;
0569 
0570     if (!page_has_buffers(page))
0571         create_empty_buffers(page, 1 << blkbits, 0);
0572 
0573     bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
0574 
0575     if (!buffer_uptodate(bh_frozen))
0576         nilfs_copy_buffer(bh_frozen, bh);
0577     if (list_empty(&bh_frozen->b_assoc_buffers)) {
0578         list_add_tail(&bh_frozen->b_assoc_buffers,
0579                   &shadow->frozen_buffers);
0580         set_buffer_nilfs_redirected(bh);
0581     } else {
0582         brelse(bh_frozen); /* already frozen */
0583     }
0584 
0585     unlock_page(page);
0586     put_page(page);
0587     return 0;
0588 }
0589 
0590 struct buffer_head *
0591 nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
0592 {
0593     struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
0594     struct buffer_head *bh_frozen = NULL;
0595     struct page *page;
0596     int n;
0597 
0598     page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index);
0599     if (page) {
0600         if (page_has_buffers(page)) {
0601             n = bh_offset(bh) >> inode->i_blkbits;
0602             bh_frozen = nilfs_page_get_nth_block(page, n);
0603         }
0604         unlock_page(page);
0605         put_page(page);
0606     }
0607     return bh_frozen;
0608 }
0609 
0610 static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow)
0611 {
0612     struct list_head *head = &shadow->frozen_buffers;
0613     struct buffer_head *bh;
0614 
0615     while (!list_empty(head)) {
0616         bh = list_first_entry(head, struct buffer_head,
0617                       b_assoc_buffers);
0618         list_del_init(&bh->b_assoc_buffers);
0619         brelse(bh); /* drop ref-count to make it releasable */
0620     }
0621 }
0622 
0623 /**
0624  * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state
0625  * @inode: inode of the metadata file
0626  */
0627 void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
0628 {
0629     struct nilfs_mdt_info *mi = NILFS_MDT(inode);
0630     struct nilfs_inode_info *ii = NILFS_I(inode);
0631     struct nilfs_shadow_map *shadow = mi->mi_shadow;
0632 
0633     down_write(&mi->mi_sem);
0634 
0635     if (mi->mi_palloc_cache)
0636         nilfs_palloc_clear_cache(inode);
0637 
0638     nilfs_clear_dirty_pages(inode->i_mapping, true);
0639     nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping);
0640 
0641     nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true);
0642     nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping,
0643                   NILFS_I(shadow->inode)->i_assoc_inode->i_mapping);
0644 
0645     nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
0646 
0647     up_write(&mi->mi_sem);
0648 }
0649 
0650 /**
0651  * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches
0652  * @inode: inode of the metadata file
0653  */
0654 void nilfs_mdt_clear_shadow_map(struct inode *inode)
0655 {
0656     struct nilfs_mdt_info *mi = NILFS_MDT(inode);
0657     struct nilfs_shadow_map *shadow = mi->mi_shadow;
0658     struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode;
0659 
0660     down_write(&mi->mi_sem);
0661     nilfs_release_frozen_buffers(shadow);
0662     truncate_inode_pages(shadow->inode->i_mapping, 0);
0663     truncate_inode_pages(shadow_btnc_inode->i_mapping, 0);
0664     up_write(&mi->mi_sem);
0665 }