Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
0004  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
0005  */
0006 
0007 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0008 
0009 #include <linux/slab.h>
0010 #include <linux/spinlock.h>
0011 #include <linux/completion.h>
0012 #include <linux/buffer_head.h>
0013 #include <linux/fs.h>
0014 #include <linux/gfs2_ondisk.h>
0015 #include <linux/prefetch.h>
0016 #include <linux/blkdev.h>
0017 #include <linux/rbtree.h>
0018 #include <linux/random.h>
0019 
0020 #include "gfs2.h"
0021 #include "incore.h"
0022 #include "glock.h"
0023 #include "glops.h"
0024 #include "lops.h"
0025 #include "meta_io.h"
0026 #include "quota.h"
0027 #include "rgrp.h"
0028 #include "super.h"
0029 #include "trans.h"
0030 #include "util.h"
0031 #include "log.h"
0032 #include "inode.h"
0033 #include "trace_gfs2.h"
0034 #include "dir.h"
0035 
0036 #define BFITNOENT ((u32)~0)
0037 #define NO_BLOCK ((u64)~0)
0038 
0039 struct gfs2_rbm {
0040     struct gfs2_rgrpd *rgd;
0041     u32 offset;     /* The offset is bitmap relative */
0042     int bii;        /* Bitmap index */
0043 };
0044 
0045 static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
0046 {
0047     return rbm->rgd->rd_bits + rbm->bii;
0048 }
0049 
0050 static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
0051 {
0052     BUG_ON(rbm->offset >= rbm->rgd->rd_data);
0053     return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
0054         rbm->offset;
0055 }
0056 
0057 /*
0058  * These routines are used by the resource group routines (rgrp.c)
0059  * to keep track of block allocation.  Each block is represented by two
0060  * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
0061  *
0062  * 0 = Free
0063  * 1 = Used (not metadata)
0064  * 2 = Unlinked (still in use) inode
0065  * 3 = Used (metadata)
0066  */
0067 
0068 struct gfs2_extent {
0069     struct gfs2_rbm rbm;
0070     u32 len;
0071 };
0072 
0073 static const char valid_change[16] = {
0074             /* current */
0075     /* n */ 0, 1, 1, 1,
0076     /* e */ 1, 0, 0, 0,
0077     /* w */ 0, 0, 0, 1,
0078             1, 0, 0, 0
0079 };
0080 
0081 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
0082              struct gfs2_blkreserv *rs, bool nowrap);
0083 
0084 
0085 /**
0086  * gfs2_setbit - Set a bit in the bitmaps
0087  * @rbm: The position of the bit to set
0088  * @do_clone: Also set the clone bitmap, if it exists
0089  * @new_state: the new state of the block
0090  *
0091  */
0092 
0093 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
0094                    unsigned char new_state)
0095 {
0096     unsigned char *byte1, *byte2, *end, cur_state;
0097     struct gfs2_bitmap *bi = rbm_bi(rbm);
0098     unsigned int buflen = bi->bi_bytes;
0099     const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
0100 
0101     byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
0102     end = bi->bi_bh->b_data + bi->bi_offset + buflen;
0103 
0104     BUG_ON(byte1 >= end);
0105 
0106     cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
0107 
0108     if (unlikely(!valid_change[new_state * 4 + cur_state])) {
0109         struct gfs2_sbd *sdp = rbm->rgd->rd_sbd;
0110 
0111         fs_warn(sdp, "buf_blk = 0x%x old_state=%d, new_state=%d\n",
0112             rbm->offset, cur_state, new_state);
0113         fs_warn(sdp, "rgrp=0x%llx bi_start=0x%x biblk: 0x%llx\n",
0114             (unsigned long long)rbm->rgd->rd_addr, bi->bi_start,
0115             (unsigned long long)bi->bi_bh->b_blocknr);
0116         fs_warn(sdp, "bi_offset=0x%x bi_bytes=0x%x block=0x%llx\n",
0117             bi->bi_offset, bi->bi_bytes,
0118             (unsigned long long)gfs2_rbm_to_block(rbm));
0119         dump_stack();
0120         gfs2_consist_rgrpd(rbm->rgd);
0121         return;
0122     }
0123     *byte1 ^= (cur_state ^ new_state) << bit;
0124 
0125     if (do_clone && bi->bi_clone) {
0126         byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
0127         cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
0128         *byte2 ^= (cur_state ^ new_state) << bit;
0129     }
0130 }
0131 
0132 /**
0133  * gfs2_testbit - test a bit in the bitmaps
0134  * @rbm: The bit to test
0135  * @use_clone: If true, test the clone bitmap, not the official bitmap.
0136  *
0137  * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
0138  * not the "real" bitmaps, to avoid allocating recently freed blocks.
0139  *
0140  * Returns: The two bit block state of the requested bit
0141  */
0142 
0143 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
0144 {
0145     struct gfs2_bitmap *bi = rbm_bi(rbm);
0146     const u8 *buffer;
0147     const u8 *byte;
0148     unsigned int bit;
0149 
0150     if (use_clone && bi->bi_clone)
0151         buffer = bi->bi_clone;
0152     else
0153         buffer = bi->bi_bh->b_data;
0154     buffer += bi->bi_offset;
0155     byte = buffer + (rbm->offset / GFS2_NBBY);
0156     bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
0157 
0158     return (*byte >> bit) & GFS2_BIT_MASK;
0159 }
0160 
0161 /**
0162  * gfs2_bit_search
0163  * @ptr: Pointer to bitmap data
0164  * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
0165  * @state: The state we are searching for
0166  *
0167  * We xor the bitmap data with a patter which is the bitwise opposite
0168  * of what we are looking for, this gives rise to a pattern of ones
0169  * wherever there is a match. Since we have two bits per entry, we
0170  * take this pattern, shift it down by one place and then and it with
0171  * the original. All the even bit positions (0,2,4, etc) then represent
0172  * successful matches, so we mask with 0x55555..... to remove the unwanted
0173  * odd bit positions.
0174  *
0175  * This allows searching of a whole u64 at once (32 blocks) with a
0176  * single test (on 64 bit arches).
0177  */
0178 
0179 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
0180 {
0181     u64 tmp;
0182     static const u64 search[] = {
0183         [0] = 0xffffffffffffffffULL,
0184         [1] = 0xaaaaaaaaaaaaaaaaULL,
0185         [2] = 0x5555555555555555ULL,
0186         [3] = 0x0000000000000000ULL,
0187     };
0188     tmp = le64_to_cpu(*ptr) ^ search[state];
0189     tmp &= (tmp >> 1);
0190     tmp &= mask;
0191     return tmp;
0192 }
0193 
0194 /**
0195  * rs_cmp - multi-block reservation range compare
0196  * @start: start of the new reservation
0197  * @len: number of blocks in the new reservation
0198  * @rs: existing reservation to compare against
0199  *
0200  * returns: 1 if the block range is beyond the reach of the reservation
0201  *         -1 if the block range is before the start of the reservation
0202  *          0 if the block range overlaps with the reservation
0203  */
0204 static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs)
0205 {
0206     if (start >= rs->rs_start + rs->rs_requested)
0207         return 1;
0208     if (rs->rs_start >= start + len)
0209         return -1;
0210     return 0;
0211 }
0212 
0213 /**
0214  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
0215  *       a block in a given allocation state.
0216  * @buf: the buffer that holds the bitmaps
0217  * @len: the length (in bytes) of the buffer
0218  * @goal: start search at this block's bit-pair (within @buffer)
0219  * @state: GFS2_BLKST_XXX the state of the block we're looking for.
0220  *
0221  * Scope of @goal and returned block number is only within this bitmap buffer,
0222  * not entire rgrp or filesystem.  @buffer will be offset from the actual
0223  * beginning of a bitmap block buffer, skipping any header structures, but
0224  * headers are always a multiple of 64 bits long so that the buffer is
0225  * always aligned to a 64 bit boundary.
0226  *
0227  * The size of the buffer is in bytes, but is it assumed that it is
0228  * always ok to read a complete multiple of 64 bits at the end
0229  * of the block in case the end is no aligned to a natural boundary.
0230  *
0231  * Return: the block number (bitmap buffer scope) that was found
0232  */
0233 
0234 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
0235                u32 goal, u8 state)
0236 {
0237     u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
0238     const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
0239     const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
0240     u64 tmp;
0241     u64 mask = 0x5555555555555555ULL;
0242     u32 bit;
0243 
0244     /* Mask off bits we don't care about at the start of the search */
0245     mask <<= spoint;
0246     tmp = gfs2_bit_search(ptr, mask, state);
0247     ptr++;
0248     while(tmp == 0 && ptr < end) {
0249         tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
0250         ptr++;
0251     }
0252     /* Mask off any bits which are more than len bytes from the start */
0253     if (ptr == end && (len & (sizeof(u64) - 1)))
0254         tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
0255     /* Didn't find anything, so return */
0256     if (tmp == 0)
0257         return BFITNOENT;
0258     ptr--;
0259     bit = __ffs64(tmp);
0260     bit /= 2;   /* two bits per entry in the bitmap */
0261     return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
0262 }
0263 
0264 /**
0265  * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
0266  * @rbm: The rbm with rgd already set correctly
0267  * @block: The block number (filesystem relative)
0268  *
0269  * This sets the bi and offset members of an rbm based on a
0270  * resource group and a filesystem relative block number. The
0271  * resource group must be set in the rbm on entry, the bi and
0272  * offset members will be set by this function.
0273  *
0274  * Returns: 0 on success, or an error code
0275  */
0276 
0277 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
0278 {
0279     if (!rgrp_contains_block(rbm->rgd, block))
0280         return -E2BIG;
0281     rbm->bii = 0;
0282     rbm->offset = block - rbm->rgd->rd_data0;
0283     /* Check if the block is within the first block */
0284     if (rbm->offset < rbm_bi(rbm)->bi_blocks)
0285         return 0;
0286 
0287     /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
0288     rbm->offset += (sizeof(struct gfs2_rgrp) -
0289             sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
0290     rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
0291     rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
0292     return 0;
0293 }
0294 
0295 /**
0296  * gfs2_rbm_add - add a number of blocks to an rbm
0297  * @rbm: The rbm with rgd already set correctly
0298  * @blocks: The number of blocks to add to rpm
0299  *
0300  * This function takes an existing rbm structure and adds a number of blocks to
0301  * it.
0302  *
0303  * Returns: True if the new rbm would point past the end of the rgrp.
0304  */
0305 
0306 static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
0307 {
0308     struct gfs2_rgrpd *rgd = rbm->rgd;
0309     struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii;
0310 
0311     if (rbm->offset + blocks < bi->bi_blocks) {
0312         rbm->offset += blocks;
0313         return false;
0314     }
0315     blocks -= bi->bi_blocks - rbm->offset;
0316 
0317     for(;;) {
0318         bi++;
0319         if (bi == rgd->rd_bits + rgd->rd_length)
0320             return true;
0321         if (blocks < bi->bi_blocks) {
0322             rbm->offset = blocks;
0323             rbm->bii = bi - rgd->rd_bits;
0324             return false;
0325         }
0326         blocks -= bi->bi_blocks;
0327     }
0328 }
0329 
0330 /**
0331  * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
0332  * @rbm: Position to search (value/result)
0333  * @n_unaligned: Number of unaligned blocks to check
0334  * @len: Decremented for each block found (terminate on zero)
0335  *
0336  * Returns: true if a non-free block is encountered or the end of the resource
0337  *      group is reached.
0338  */
0339 
0340 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
0341 {
0342     u32 n;
0343     u8 res;
0344 
0345     for (n = 0; n < n_unaligned; n++) {
0346         res = gfs2_testbit(rbm, true);
0347         if (res != GFS2_BLKST_FREE)
0348             return true;
0349         (*len)--;
0350         if (*len == 0)
0351             return true;
0352         if (gfs2_rbm_add(rbm, 1))
0353             return true;
0354     }
0355 
0356     return false;
0357 }
0358 
0359 /**
0360  * gfs2_free_extlen - Return extent length of free blocks
0361  * @rrbm: Starting position
0362  * @len: Max length to check
0363  *
0364  * Starting at the block specified by the rbm, see how many free blocks
0365  * there are, not reading more than len blocks ahead. This can be done
0366  * using memchr_inv when the blocks are byte aligned, but has to be done
0367  * on a block by block basis in case of unaligned blocks. Also this
0368  * function can cope with bitmap boundaries (although it must stop on
0369  * a resource group boundary)
0370  *
0371  * Returns: Number of free blocks in the extent
0372  */
0373 
0374 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
0375 {
0376     struct gfs2_rbm rbm = *rrbm;
0377     u32 n_unaligned = rbm.offset & 3;
0378     u32 size = len;
0379     u32 bytes;
0380     u32 chunk_size;
0381     u8 *ptr, *start, *end;
0382     u64 block;
0383     struct gfs2_bitmap *bi;
0384 
0385     if (n_unaligned &&
0386         gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
0387         goto out;
0388 
0389     n_unaligned = len & 3;
0390     /* Start is now byte aligned */
0391     while (len > 3) {
0392         bi = rbm_bi(&rbm);
0393         start = bi->bi_bh->b_data;
0394         if (bi->bi_clone)
0395             start = bi->bi_clone;
0396         start += bi->bi_offset;
0397         end = start + bi->bi_bytes;
0398         BUG_ON(rbm.offset & 3);
0399         start += (rbm.offset / GFS2_NBBY);
0400         bytes = min_t(u32, len / GFS2_NBBY, (end - start));
0401         ptr = memchr_inv(start, 0, bytes);
0402         chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
0403         chunk_size *= GFS2_NBBY;
0404         BUG_ON(len < chunk_size);
0405         len -= chunk_size;
0406         block = gfs2_rbm_to_block(&rbm);
0407         if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
0408             n_unaligned = 0;
0409             break;
0410         }
0411         if (ptr) {
0412             n_unaligned = 3;
0413             break;
0414         }
0415         n_unaligned = len & 3;
0416     }
0417 
0418     /* Deal with any bits left over at the end */
0419     if (n_unaligned)
0420         gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
0421 out:
0422     return size - len;
0423 }
0424 
0425 /**
0426  * gfs2_bitcount - count the number of bits in a certain state
0427  * @rgd: the resource group descriptor
0428  * @buffer: the buffer that holds the bitmaps
0429  * @buflen: the length (in bytes) of the buffer
0430  * @state: the state of the block we're looking for
0431  *
0432  * Returns: The number of bits
0433  */
0434 
0435 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
0436              unsigned int buflen, u8 state)
0437 {
0438     const u8 *byte = buffer;
0439     const u8 *end = buffer + buflen;
0440     const u8 state1 = state << 2;
0441     const u8 state2 = state << 4;
0442     const u8 state3 = state << 6;
0443     u32 count = 0;
0444 
0445     for (; byte < end; byte++) {
0446         if (((*byte) & 0x03) == state)
0447             count++;
0448         if (((*byte) & 0x0C) == state1)
0449             count++;
0450         if (((*byte) & 0x30) == state2)
0451             count++;
0452         if (((*byte) & 0xC0) == state3)
0453             count++;
0454     }
0455 
0456     return count;
0457 }
0458 
0459 /**
0460  * gfs2_rgrp_verify - Verify that a resource group is consistent
0461  * @rgd: the rgrp
0462  *
0463  */
0464 
0465 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
0466 {
0467     struct gfs2_sbd *sdp = rgd->rd_sbd;
0468     struct gfs2_bitmap *bi = NULL;
0469     u32 length = rgd->rd_length;
0470     u32 count[4], tmp;
0471     int buf, x;
0472 
0473     memset(count, 0, 4 * sizeof(u32));
0474 
0475     /* Count # blocks in each of 4 possible allocation states */
0476     for (buf = 0; buf < length; buf++) {
0477         bi = rgd->rd_bits + buf;
0478         for (x = 0; x < 4; x++)
0479             count[x] += gfs2_bitcount(rgd,
0480                           bi->bi_bh->b_data +
0481                           bi->bi_offset,
0482                           bi->bi_bytes, x);
0483     }
0484 
0485     if (count[0] != rgd->rd_free) {
0486         gfs2_lm(sdp, "free data mismatch:  %u != %u\n",
0487             count[0], rgd->rd_free);
0488         gfs2_consist_rgrpd(rgd);
0489         return;
0490     }
0491 
0492     tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
0493     if (count[1] != tmp) {
0494         gfs2_lm(sdp, "used data mismatch:  %u != %u\n",
0495             count[1], tmp);
0496         gfs2_consist_rgrpd(rgd);
0497         return;
0498     }
0499 
0500     if (count[2] + count[3] != rgd->rd_dinodes) {
0501         gfs2_lm(sdp, "used metadata mismatch:  %u != %u\n",
0502             count[2] + count[3], rgd->rd_dinodes);
0503         gfs2_consist_rgrpd(rgd);
0504         return;
0505     }
0506 }
0507 
0508 /**
0509  * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
0510  * @sdp: The GFS2 superblock
0511  * @blk: The data block number
0512  * @exact: True if this needs to be an exact match
0513  *
0514  * The @exact argument should be set to true by most callers. The exception
0515  * is when we need to match blocks which are not represented by the rgrp
0516  * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
0517  * there for alignment purposes. Another way of looking at it is that @exact
0518  * matches only valid data/metadata blocks, but with @exact false, it will
0519  * match any block within the extent of the rgrp.
0520  *
0521  * Returns: The resource group, or NULL if not found
0522  */
0523 
0524 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
0525 {
0526     struct rb_node *n, *next;
0527     struct gfs2_rgrpd *cur;
0528 
0529     spin_lock(&sdp->sd_rindex_spin);
0530     n = sdp->sd_rindex_tree.rb_node;
0531     while (n) {
0532         cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
0533         next = NULL;
0534         if (blk < cur->rd_addr)
0535             next = n->rb_left;
0536         else if (blk >= cur->rd_data0 + cur->rd_data)
0537             next = n->rb_right;
0538         if (next == NULL) {
0539             spin_unlock(&sdp->sd_rindex_spin);
0540             if (exact) {
0541                 if (blk < cur->rd_addr)
0542                     return NULL;
0543                 if (blk >= cur->rd_data0 + cur->rd_data)
0544                     return NULL;
0545             }
0546             return cur;
0547         }
0548         n = next;
0549     }
0550     spin_unlock(&sdp->sd_rindex_spin);
0551 
0552     return NULL;
0553 }
0554 
0555 /**
0556  * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
0557  * @sdp: The GFS2 superblock
0558  *
0559  * Returns: The first rgrp in the filesystem
0560  */
0561 
0562 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
0563 {
0564     const struct rb_node *n;
0565     struct gfs2_rgrpd *rgd;
0566 
0567     spin_lock(&sdp->sd_rindex_spin);
0568     n = rb_first(&sdp->sd_rindex_tree);
0569     rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
0570     spin_unlock(&sdp->sd_rindex_spin);
0571 
0572     return rgd;
0573 }
0574 
0575 /**
0576  * gfs2_rgrpd_get_next - get the next RG
0577  * @rgd: the resource group descriptor
0578  *
0579  * Returns: The next rgrp
0580  */
0581 
0582 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
0583 {
0584     struct gfs2_sbd *sdp = rgd->rd_sbd;
0585     const struct rb_node *n;
0586 
0587     spin_lock(&sdp->sd_rindex_spin);
0588     n = rb_next(&rgd->rd_node);
0589     if (n == NULL)
0590         n = rb_first(&sdp->sd_rindex_tree);
0591 
0592     if (unlikely(&rgd->rd_node == n)) {
0593         spin_unlock(&sdp->sd_rindex_spin);
0594         return NULL;
0595     }
0596     rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
0597     spin_unlock(&sdp->sd_rindex_spin);
0598     return rgd;
0599 }
0600 
0601 void check_and_update_goal(struct gfs2_inode *ip)
0602 {
0603     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
0604     if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
0605         ip->i_goal = ip->i_no_addr;
0606 }
0607 
0608 void gfs2_free_clones(struct gfs2_rgrpd *rgd)
0609 {
0610     int x;
0611 
0612     for (x = 0; x < rgd->rd_length; x++) {
0613         struct gfs2_bitmap *bi = rgd->rd_bits + x;
0614         kfree(bi->bi_clone);
0615         bi->bi_clone = NULL;
0616     }
0617 }
0618 
0619 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
0620             const char *fs_id_buf)
0621 {
0622     struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
0623 
0624     gfs2_print_dbg(seq, "%s  B: n:%llu s:%llu f:%u\n",
0625                fs_id_buf,
0626                (unsigned long long)ip->i_no_addr,
0627                (unsigned long long)rs->rs_start,
0628                rs->rs_requested);
0629 }
0630 
0631 /**
0632  * __rs_deltree - remove a multi-block reservation from the rgd tree
0633  * @rs: The reservation to remove
0634  *
0635  */
0636 static void __rs_deltree(struct gfs2_blkreserv *rs)
0637 {
0638     struct gfs2_rgrpd *rgd;
0639 
0640     if (!gfs2_rs_active(rs))
0641         return;
0642 
0643     rgd = rs->rs_rgd;
0644     trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
0645     rb_erase(&rs->rs_node, &rgd->rd_rstree);
0646     RB_CLEAR_NODE(&rs->rs_node);
0647 
0648     if (rs->rs_requested) {
0649         /* return requested blocks to the rgrp */
0650         BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested);
0651         rs->rs_rgd->rd_requested -= rs->rs_requested;
0652 
0653         /* The rgrp extent failure point is likely not to increase;
0654            it will only do so if the freed blocks are somehow
0655            contiguous with a span of free blocks that follows. Still,
0656            it will force the number to be recalculated later. */
0657         rgd->rd_extfail_pt += rs->rs_requested;
0658         rs->rs_requested = 0;
0659     }
0660 }
0661 
0662 /**
0663  * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
0664  * @rs: The reservation to remove
0665  *
0666  */
0667 void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
0668 {
0669     struct gfs2_rgrpd *rgd;
0670 
0671     rgd = rs->rs_rgd;
0672     if (rgd) {
0673         spin_lock(&rgd->rd_rsspin);
0674         __rs_deltree(rs);
0675         BUG_ON(rs->rs_requested);
0676         spin_unlock(&rgd->rd_rsspin);
0677     }
0678 }
0679 
0680 /**
0681  * gfs2_rs_delete - delete a multi-block reservation
0682  * @ip: The inode for this reservation
0683  *
0684  */
0685 void gfs2_rs_delete(struct gfs2_inode *ip)
0686 {
0687     struct inode *inode = &ip->i_inode;
0688 
0689     down_write(&ip->i_rw_mutex);
0690     if (atomic_read(&inode->i_writecount) <= 1)
0691         gfs2_rs_deltree(&ip->i_res);
0692     up_write(&ip->i_rw_mutex);
0693 }
0694 
0695 /**
0696  * return_all_reservations - return all reserved blocks back to the rgrp.
0697  * @rgd: the rgrp that needs its space back
0698  *
0699  * We previously reserved a bunch of blocks for allocation. Now we need to
0700  * give them back. This leave the reservation structures in tact, but removes
0701  * all of their corresponding "no-fly zones".
0702  */
0703 static void return_all_reservations(struct gfs2_rgrpd *rgd)
0704 {
0705     struct rb_node *n;
0706     struct gfs2_blkreserv *rs;
0707 
0708     spin_lock(&rgd->rd_rsspin);
0709     while ((n = rb_first(&rgd->rd_rstree))) {
0710         rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
0711         __rs_deltree(rs);
0712     }
0713     spin_unlock(&rgd->rd_rsspin);
0714 }
0715 
0716 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
0717 {
0718     struct rb_node *n;
0719     struct gfs2_rgrpd *rgd;
0720     struct gfs2_glock *gl;
0721 
0722     while ((n = rb_first(&sdp->sd_rindex_tree))) {
0723         rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
0724         gl = rgd->rd_gl;
0725 
0726         rb_erase(n, &sdp->sd_rindex_tree);
0727 
0728         if (gl) {
0729             if (gl->gl_state != LM_ST_UNLOCKED) {
0730                 gfs2_glock_cb(gl, LM_ST_UNLOCKED);
0731                 flush_delayed_work(&gl->gl_work);
0732             }
0733             gfs2_rgrp_brelse(rgd);
0734             glock_clear_object(gl, rgd);
0735             gfs2_glock_put(gl);
0736         }
0737 
0738         gfs2_free_clones(rgd);
0739         return_all_reservations(rgd);
0740         kfree(rgd->rd_bits);
0741         rgd->rd_bits = NULL;
0742         kmem_cache_free(gfs2_rgrpd_cachep, rgd);
0743     }
0744 }
0745 
0746 /**
0747  * compute_bitstructs - Compute the bitmap sizes
0748  * @rgd: The resource group descriptor
0749  *
0750  * Calculates bitmap descriptors, one for each block that contains bitmap data
0751  *
0752  * Returns: errno
0753  */
0754 
0755 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
0756 {
0757     struct gfs2_sbd *sdp = rgd->rd_sbd;
0758     struct gfs2_bitmap *bi;
0759     u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
0760     u32 bytes_left, bytes;
0761     int x;
0762 
0763     if (!length)
0764         return -EINVAL;
0765 
0766     rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
0767     if (!rgd->rd_bits)
0768         return -ENOMEM;
0769 
0770     bytes_left = rgd->rd_bitbytes;
0771 
0772     for (x = 0; x < length; x++) {
0773         bi = rgd->rd_bits + x;
0774 
0775         bi->bi_flags = 0;
0776         /* small rgrp; bitmap stored completely in header block */
0777         if (length == 1) {
0778             bytes = bytes_left;
0779             bi->bi_offset = sizeof(struct gfs2_rgrp);
0780             bi->bi_start = 0;
0781             bi->bi_bytes = bytes;
0782             bi->bi_blocks = bytes * GFS2_NBBY;
0783         /* header block */
0784         } else if (x == 0) {
0785             bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
0786             bi->bi_offset = sizeof(struct gfs2_rgrp);
0787             bi->bi_start = 0;
0788             bi->bi_bytes = bytes;
0789             bi->bi_blocks = bytes * GFS2_NBBY;
0790         /* last block */
0791         } else if (x + 1 == length) {
0792             bytes = bytes_left;
0793             bi->bi_offset = sizeof(struct gfs2_meta_header);
0794             bi->bi_start = rgd->rd_bitbytes - bytes_left;
0795             bi->bi_bytes = bytes;
0796             bi->bi_blocks = bytes * GFS2_NBBY;
0797         /* other blocks */
0798         } else {
0799             bytes = sdp->sd_sb.sb_bsize -
0800                 sizeof(struct gfs2_meta_header);
0801             bi->bi_offset = sizeof(struct gfs2_meta_header);
0802             bi->bi_start = rgd->rd_bitbytes - bytes_left;
0803             bi->bi_bytes = bytes;
0804             bi->bi_blocks = bytes * GFS2_NBBY;
0805         }
0806 
0807         bytes_left -= bytes;
0808     }
0809 
0810     if (bytes_left) {
0811         gfs2_consist_rgrpd(rgd);
0812         return -EIO;
0813     }
0814     bi = rgd->rd_bits + (length - 1);
0815     if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
0816         gfs2_lm(sdp,
0817             "ri_addr = %llu\n"
0818             "ri_length = %u\n"
0819             "ri_data0 = %llu\n"
0820             "ri_data = %u\n"
0821             "ri_bitbytes = %u\n"
0822             "start=%u len=%u offset=%u\n",
0823             (unsigned long long)rgd->rd_addr,
0824             rgd->rd_length,
0825             (unsigned long long)rgd->rd_data0,
0826             rgd->rd_data,
0827             rgd->rd_bitbytes,
0828             bi->bi_start, bi->bi_bytes, bi->bi_offset);
0829         gfs2_consist_rgrpd(rgd);
0830         return -EIO;
0831     }
0832 
0833     return 0;
0834 }
0835 
0836 /**
0837  * gfs2_ri_total - Total up the file system space, according to the rindex.
0838  * @sdp: the filesystem
0839  *
0840  */
0841 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
0842 {
0843     u64 total_data = 0; 
0844     struct inode *inode = sdp->sd_rindex;
0845     struct gfs2_inode *ip = GFS2_I(inode);
0846     char buf[sizeof(struct gfs2_rindex)];
0847     int error, rgrps;
0848 
0849     for (rgrps = 0;; rgrps++) {
0850         loff_t pos = rgrps * sizeof(struct gfs2_rindex);
0851 
0852         if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
0853             break;
0854         error = gfs2_internal_read(ip, buf, &pos,
0855                        sizeof(struct gfs2_rindex));
0856         if (error != sizeof(struct gfs2_rindex))
0857             break;
0858         total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
0859     }
0860     return total_data;
0861 }
0862 
0863 static int rgd_insert(struct gfs2_rgrpd *rgd)
0864 {
0865     struct gfs2_sbd *sdp = rgd->rd_sbd;
0866     struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
0867 
0868     /* Figure out where to put new node */
0869     while (*newn) {
0870         struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
0871                           rd_node);
0872 
0873         parent = *newn;
0874         if (rgd->rd_addr < cur->rd_addr)
0875             newn = &((*newn)->rb_left);
0876         else if (rgd->rd_addr > cur->rd_addr)
0877             newn = &((*newn)->rb_right);
0878         else
0879             return -EEXIST;
0880     }
0881 
0882     rb_link_node(&rgd->rd_node, parent, newn);
0883     rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
0884     sdp->sd_rgrps++;
0885     return 0;
0886 }
0887 
0888 /**
0889  * read_rindex_entry - Pull in a new resource index entry from the disk
0890  * @ip: Pointer to the rindex inode
0891  *
0892  * Returns: 0 on success, > 0 on EOF, error code otherwise
0893  */
0894 
0895 static int read_rindex_entry(struct gfs2_inode *ip)
0896 {
0897     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
0898     loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
0899     struct gfs2_rindex buf;
0900     int error;
0901     struct gfs2_rgrpd *rgd;
0902 
0903     if (pos >= i_size_read(&ip->i_inode))
0904         return 1;
0905 
0906     error = gfs2_internal_read(ip, (char *)&buf, &pos,
0907                    sizeof(struct gfs2_rindex));
0908 
0909     if (error != sizeof(struct gfs2_rindex))
0910         return (error == 0) ? 1 : error;
0911 
0912     rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
0913     error = -ENOMEM;
0914     if (!rgd)
0915         return error;
0916 
0917     rgd->rd_sbd = sdp;
0918     rgd->rd_addr = be64_to_cpu(buf.ri_addr);
0919     rgd->rd_length = be32_to_cpu(buf.ri_length);
0920     rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
0921     rgd->rd_data = be32_to_cpu(buf.ri_data);
0922     rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
0923     spin_lock_init(&rgd->rd_rsspin);
0924     mutex_init(&rgd->rd_mutex);
0925 
0926     error = gfs2_glock_get(sdp, rgd->rd_addr,
0927                    &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
0928     if (error)
0929         goto fail;
0930 
0931     error = compute_bitstructs(rgd);
0932     if (error)
0933         goto fail_glock;
0934 
0935     rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
0936     rgd->rd_flags &= ~GFS2_RDF_PREFERRED;
0937     if (rgd->rd_data > sdp->sd_max_rg_data)
0938         sdp->sd_max_rg_data = rgd->rd_data;
0939     spin_lock(&sdp->sd_rindex_spin);
0940     error = rgd_insert(rgd);
0941     spin_unlock(&sdp->sd_rindex_spin);
0942     if (!error) {
0943         glock_set_object(rgd->rd_gl, rgd);
0944         return 0;
0945     }
0946 
0947     error = 0; /* someone else read in the rgrp; free it and ignore it */
0948 fail_glock:
0949     gfs2_glock_put(rgd->rd_gl);
0950 
0951 fail:
0952     kfree(rgd->rd_bits);
0953     rgd->rd_bits = NULL;
0954     kmem_cache_free(gfs2_rgrpd_cachep, rgd);
0955     return error;
0956 }
0957 
0958 /**
0959  * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
0960  * @sdp: the GFS2 superblock
0961  *
0962  * The purpose of this function is to select a subset of the resource groups
0963  * and mark them as PREFERRED. We do it in such a way that each node prefers
0964  * to use a unique set of rgrps to minimize glock contention.
0965  */
0966 static void set_rgrp_preferences(struct gfs2_sbd *sdp)
0967 {
0968     struct gfs2_rgrpd *rgd, *first;
0969     int i;
0970 
0971     /* Skip an initial number of rgrps, based on this node's journal ID.
0972        That should start each node out on its own set. */
0973     rgd = gfs2_rgrpd_get_first(sdp);
0974     for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
0975         rgd = gfs2_rgrpd_get_next(rgd);
0976     first = rgd;
0977 
0978     do {
0979         rgd->rd_flags |= GFS2_RDF_PREFERRED;
0980         for (i = 0; i < sdp->sd_journals; i++) {
0981             rgd = gfs2_rgrpd_get_next(rgd);
0982             if (!rgd || rgd == first)
0983                 break;
0984         }
0985     } while (rgd && rgd != first);
0986 }
0987 
0988 /**
0989  * gfs2_ri_update - Pull in a new resource index from the disk
0990  * @ip: pointer to the rindex inode
0991  *
0992  * Returns: 0 on successful update, error code otherwise
0993  */
0994 
0995 static int gfs2_ri_update(struct gfs2_inode *ip)
0996 {
0997     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
0998     int error;
0999 
1000     do {
1001         error = read_rindex_entry(ip);
1002     } while (error == 0);
1003 
1004     if (error < 0)
1005         return error;
1006 
1007     if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
1008         fs_err(sdp, "no resource groups found in the file system.\n");
1009         return -ENOENT;
1010     }
1011     set_rgrp_preferences(sdp);
1012 
1013     sdp->sd_rindex_uptodate = 1;
1014     return 0;
1015 }
1016 
1017 /**
1018  * gfs2_rindex_update - Update the rindex if required
1019  * @sdp: The GFS2 superblock
1020  *
1021  * We grab a lock on the rindex inode to make sure that it doesn't
1022  * change whilst we are performing an operation. We keep this lock
1023  * for quite long periods of time compared to other locks. This
1024  * doesn't matter, since it is shared and it is very, very rarely
1025  * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1026  *
1027  * This makes sure that we're using the latest copy of the resource index
1028  * special file, which might have been updated if someone expanded the
1029  * filesystem (via gfs2_grow utility), which adds new resource groups.
1030  *
1031  * Returns: 0 on succeess, error code otherwise
1032  */
1033 
1034 int gfs2_rindex_update(struct gfs2_sbd *sdp)
1035 {
1036     struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1037     struct gfs2_glock *gl = ip->i_gl;
1038     struct gfs2_holder ri_gh;
1039     int error = 0;
1040     int unlock_required = 0;
1041 
1042     /* Read new copy from disk if we don't have the latest */
1043     if (!sdp->sd_rindex_uptodate) {
1044         if (!gfs2_glock_is_locked_by_me(gl)) {
1045             error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1046             if (error)
1047                 return error;
1048             unlock_required = 1;
1049         }
1050         if (!sdp->sd_rindex_uptodate)
1051             error = gfs2_ri_update(ip);
1052         if (unlock_required)
1053             gfs2_glock_dq_uninit(&ri_gh);
1054     }
1055 
1056     return error;
1057 }
1058 
1059 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1060 {
1061     const struct gfs2_rgrp *str = buf;
1062     u32 rg_flags;
1063 
1064     rg_flags = be32_to_cpu(str->rg_flags);
1065     rg_flags &= ~GFS2_RDF_MASK;
1066     rgd->rd_flags &= GFS2_RDF_MASK;
1067     rgd->rd_flags |= rg_flags;
1068     rgd->rd_free = be32_to_cpu(str->rg_free);
1069     rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1070     rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1071     /* rd_data0, rd_data and rd_bitbytes already set from rindex */
1072 }
1073 
1074 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1075 {
1076     const struct gfs2_rgrp *str = buf;
1077 
1078     rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1079     rgl->rl_flags = str->rg_flags;
1080     rgl->rl_free = str->rg_free;
1081     rgl->rl_dinodes = str->rg_dinodes;
1082     rgl->rl_igeneration = str->rg_igeneration;
1083     rgl->__pad = 0UL;
1084 }
1085 
1086 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1087 {
1088     struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
1089     struct gfs2_rgrp *str = buf;
1090     u32 crc;
1091 
1092     str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1093     str->rg_free = cpu_to_be32(rgd->rd_free);
1094     str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1095     if (next == NULL)
1096         str->rg_skip = 0;
1097     else if (next->rd_addr > rgd->rd_addr)
1098         str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
1099     str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1100     str->rg_data0 = cpu_to_be64(rgd->rd_data0);
1101     str->rg_data = cpu_to_be32(rgd->rd_data);
1102     str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
1103     str->rg_crc = 0;
1104     crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
1105     str->rg_crc = cpu_to_be32(crc);
1106 
1107     memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1108     gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
1109 }
1110 
1111 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1112 {
1113     struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1114     struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1115     struct gfs2_sbd *sdp = rgd->rd_sbd;
1116     int valid = 1;
1117 
1118     if (rgl->rl_flags != str->rg_flags) {
1119         fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u",
1120             (unsigned long long)rgd->rd_addr,
1121                be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
1122         valid = 0;
1123     }
1124     if (rgl->rl_free != str->rg_free) {
1125         fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u",
1126             (unsigned long long)rgd->rd_addr,
1127             be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
1128         valid = 0;
1129     }
1130     if (rgl->rl_dinodes != str->rg_dinodes) {
1131         fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
1132             (unsigned long long)rgd->rd_addr,
1133             be32_to_cpu(rgl->rl_dinodes),
1134             be32_to_cpu(str->rg_dinodes));
1135         valid = 0;
1136     }
1137     if (rgl->rl_igeneration != str->rg_igeneration) {
1138         fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu",
1139             (unsigned long long)rgd->rd_addr,
1140             (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
1141             (unsigned long long)be64_to_cpu(str->rg_igeneration));
1142         valid = 0;
1143     }
1144     return valid;
1145 }
1146 
1147 static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1148 {
1149     struct gfs2_bitmap *bi;
1150     const u32 length = rgd->rd_length;
1151     const u8 *buffer = NULL;
1152     u32 i, goal, count = 0;
1153 
1154     for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1155         goal = 0;
1156         buffer = bi->bi_bh->b_data + bi->bi_offset;
1157         WARN_ON(!buffer_uptodate(bi->bi_bh));
1158         while (goal < bi->bi_blocks) {
1159             goal = gfs2_bitfit(buffer, bi->bi_bytes, goal,
1160                        GFS2_BLKST_UNLINKED);
1161             if (goal == BFITNOENT)
1162                 break;
1163             count++;
1164             goal++;
1165         }
1166     }
1167 
1168     return count;
1169 }
1170 
1171 static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd)
1172 {
1173     struct gfs2_bitmap *bi;
1174     int x;
1175 
1176     if (rgd->rd_free) {
1177         for (x = 0; x < rgd->rd_length; x++) {
1178             bi = rgd->rd_bits + x;
1179             clear_bit(GBF_FULL, &bi->bi_flags);
1180         }
1181     } else {
1182         for (x = 0; x < rgd->rd_length; x++) {
1183             bi = rgd->rd_bits + x;
1184             set_bit(GBF_FULL, &bi->bi_flags);
1185         }
1186     }
1187 }
1188 
1189 /**
1190  * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps
1191  * @gh: the glock holder representing the rgrpd to read in
1192  *
1193  * Read in all of a Resource Group's header and bitmap blocks.
1194  * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
1195  *
1196  * Returns: errno
1197  */
1198 
1199 int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl)
1200 {
1201     struct gfs2_rgrpd *rgd = gl->gl_object;
1202     struct gfs2_sbd *sdp = rgd->rd_sbd;
1203     unsigned int length = rgd->rd_length;
1204     struct gfs2_bitmap *bi;
1205     unsigned int x, y;
1206     int error;
1207 
1208     if (rgd->rd_bits[0].bi_bh != NULL)
1209         return 0;
1210 
1211     for (x = 0; x < length; x++) {
1212         bi = rgd->rd_bits + x;
1213         error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
1214         if (error)
1215             goto fail;
1216     }
1217 
1218     for (y = length; y--;) {
1219         bi = rgd->rd_bits + y;
1220         error = gfs2_meta_wait(sdp, bi->bi_bh);
1221         if (error)
1222             goto fail;
1223         if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1224                           GFS2_METATYPE_RG)) {
1225             error = -EIO;
1226             goto fail;
1227         }
1228     }
1229 
1230     gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1231     rgrp_set_bitmap_flags(rgd);
1232     rgd->rd_flags |= GFS2_RDF_CHECK;
1233     rgd->rd_free_clone = rgd->rd_free;
1234     GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved);
1235     /* max out the rgrp allocation failure point */
1236     rgd->rd_extfail_pt = rgd->rd_free;
1237     if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1238         rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1239         gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1240                      rgd->rd_bits[0].bi_bh->b_data);
1241     } else if (sdp->sd_args.ar_rgrplvb) {
1242         if (!gfs2_rgrp_lvb_valid(rgd)){
1243             gfs2_consist_rgrpd(rgd);
1244             error = -EIO;
1245             goto fail;
1246         }
1247         if (rgd->rd_rgl->rl_unlinked == 0)
1248             rgd->rd_flags &= ~GFS2_RDF_CHECK;
1249     }
1250     return 0;
1251 
1252 fail:
1253     while (x--) {
1254         bi = rgd->rd_bits + x;
1255         brelse(bi->bi_bh);
1256         bi->bi_bh = NULL;
1257         gfs2_assert_warn(sdp, !bi->bi_clone);
1258     }
1259     return error;
1260 }
1261 
1262 static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh)
1263 {
1264     u32 rl_flags;
1265 
1266     if (!test_bit(GLF_INSTANTIATE_NEEDED, &gh->gh_gl->gl_flags))
1267         return 0;
1268 
1269     if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1270         return gfs2_instantiate(gh);
1271 
1272     rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1273     rl_flags &= ~GFS2_RDF_MASK;
1274     rgd->rd_flags &= GFS2_RDF_MASK;
1275     rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK);
1276     if (rgd->rd_rgl->rl_unlinked == 0)
1277         rgd->rd_flags &= ~GFS2_RDF_CHECK;
1278     rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1279     rgrp_set_bitmap_flags(rgd);
1280     rgd->rd_free_clone = rgd->rd_free;
1281     GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved);
1282     /* max out the rgrp allocation failure point */
1283     rgd->rd_extfail_pt = rgd->rd_free;
1284     rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1285     rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1286     return 0;
1287 }
1288 
1289 /**
1290  * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1291  * @rgd: The resource group
1292  *
1293  */
1294 
1295 void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1296 {
1297     int x, length = rgd->rd_length;
1298 
1299     for (x = 0; x < length; x++) {
1300         struct gfs2_bitmap *bi = rgd->rd_bits + x;
1301         if (bi->bi_bh) {
1302             brelse(bi->bi_bh);
1303             bi->bi_bh = NULL;
1304         }
1305     }
1306     set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags);
1307 }
1308 
1309 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1310                  struct buffer_head *bh,
1311                  const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1312 {
1313     struct super_block *sb = sdp->sd_vfs;
1314     u64 blk;
1315     sector_t start = 0;
1316     sector_t nr_blks = 0;
1317     int rv = -EIO;
1318     unsigned int x;
1319     u32 trimmed = 0;
1320     u8 diff;
1321 
1322     for (x = 0; x < bi->bi_bytes; x++) {
1323         const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1324         clone += bi->bi_offset;
1325         clone += x;
1326         if (bh) {
1327             const u8 *orig = bh->b_data + bi->bi_offset + x;
1328             diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1329         } else {
1330             diff = ~(*clone | (*clone >> 1));
1331         }
1332         diff &= 0x55;
1333         if (diff == 0)
1334             continue;
1335         blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1336         while(diff) {
1337             if (diff & 1) {
1338                 if (nr_blks == 0)
1339                     goto start_new_extent;
1340                 if ((start + nr_blks) != blk) {
1341                     if (nr_blks >= minlen) {
1342                         rv = sb_issue_discard(sb,
1343                             start, nr_blks,
1344                             GFP_NOFS, 0);
1345                         if (rv)
1346                             goto fail;
1347                         trimmed += nr_blks;
1348                     }
1349                     nr_blks = 0;
1350 start_new_extent:
1351                     start = blk;
1352                 }
1353                 nr_blks++;
1354             }
1355             diff >>= 2;
1356             blk++;
1357         }
1358     }
1359     if (nr_blks >= minlen) {
1360         rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1361         if (rv)
1362             goto fail;
1363         trimmed += nr_blks;
1364     }
1365     if (ptrimmed)
1366         *ptrimmed = trimmed;
1367     return 0;
1368 
1369 fail:
1370     if (sdp->sd_args.ar_discard)
1371         fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
1372     sdp->sd_args.ar_discard = 0;
1373     return rv;
1374 }
1375 
1376 /**
1377  * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1378  * @filp: Any file on the filesystem
1379  * @argp: Pointer to the arguments (also used to pass result)
1380  *
1381  * Returns: 0 on success, otherwise error code
1382  */
1383 
1384 int gfs2_fitrim(struct file *filp, void __user *argp)
1385 {
1386     struct inode *inode = file_inode(filp);
1387     struct gfs2_sbd *sdp = GFS2_SB(inode);
1388     struct block_device *bdev = sdp->sd_vfs->s_bdev;
1389     struct buffer_head *bh;
1390     struct gfs2_rgrpd *rgd;
1391     struct gfs2_rgrpd *rgd_end;
1392     struct gfs2_holder gh;
1393     struct fstrim_range r;
1394     int ret = 0;
1395     u64 amt;
1396     u64 trimmed = 0;
1397     u64 start, end, minlen;
1398     unsigned int x;
1399     unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1400 
1401     if (!capable(CAP_SYS_ADMIN))
1402         return -EPERM;
1403 
1404     if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
1405         return -EROFS;
1406 
1407     if (!bdev_max_discard_sectors(bdev))
1408         return -EOPNOTSUPP;
1409 
1410     if (copy_from_user(&r, argp, sizeof(r)))
1411         return -EFAULT;
1412 
1413     ret = gfs2_rindex_update(sdp);
1414     if (ret)
1415         return ret;
1416 
1417     start = r.start >> bs_shift;
1418     end = start + (r.len >> bs_shift);
1419     minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize);
1420     minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift;
1421 
1422     if (end <= start || minlen > sdp->sd_max_rg_data)
1423         return -EINVAL;
1424 
1425     rgd = gfs2_blk2rgrpd(sdp, start, 0);
1426     rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1427 
1428     if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1429         && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1430         return -EINVAL; /* start is beyond the end of the fs */
1431 
1432     while (1) {
1433 
1434         ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1435                      LM_FLAG_NODE_SCOPE, &gh);
1436         if (ret)
1437             goto out;
1438 
1439         if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1440             /* Trim each bitmap in the rgrp */
1441             for (x = 0; x < rgd->rd_length; x++) {
1442                 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1443                 rgrp_lock_local(rgd);
1444                 ret = gfs2_rgrp_send_discards(sdp,
1445                         rgd->rd_data0, NULL, bi, minlen,
1446                         &amt);
1447                 rgrp_unlock_local(rgd);
1448                 if (ret) {
1449                     gfs2_glock_dq_uninit(&gh);
1450                     goto out;
1451                 }
1452                 trimmed += amt;
1453             }
1454 
1455             /* Mark rgrp as having been trimmed */
1456             ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1457             if (ret == 0) {
1458                 bh = rgd->rd_bits[0].bi_bh;
1459                 rgrp_lock_local(rgd);
1460                 rgd->rd_flags |= GFS2_RGF_TRIMMED;
1461                 gfs2_trans_add_meta(rgd->rd_gl, bh);
1462                 gfs2_rgrp_out(rgd, bh->b_data);
1463                 rgrp_unlock_local(rgd);
1464                 gfs2_trans_end(sdp);
1465             }
1466         }
1467         gfs2_glock_dq_uninit(&gh);
1468 
1469         if (rgd == rgd_end)
1470             break;
1471 
1472         rgd = gfs2_rgrpd_get_next(rgd);
1473     }
1474 
1475 out:
1476     r.len = trimmed << bs_shift;
1477     if (copy_to_user(argp, &r, sizeof(r)))
1478         return -EFAULT;
1479 
1480     return ret;
1481 }
1482 
1483 /**
1484  * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1485  * @ip: the inode structure
1486  *
1487  */
1488 static void rs_insert(struct gfs2_inode *ip)
1489 {
1490     struct rb_node **newn, *parent = NULL;
1491     int rc;
1492     struct gfs2_blkreserv *rs = &ip->i_res;
1493     struct gfs2_rgrpd *rgd = rs->rs_rgd;
1494 
1495     BUG_ON(gfs2_rs_active(rs));
1496 
1497     spin_lock(&rgd->rd_rsspin);
1498     newn = &rgd->rd_rstree.rb_node;
1499     while (*newn) {
1500         struct gfs2_blkreserv *cur =
1501             rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1502 
1503         parent = *newn;
1504         rc = rs_cmp(rs->rs_start, rs->rs_requested, cur);
1505         if (rc > 0)
1506             newn = &((*newn)->rb_right);
1507         else if (rc < 0)
1508             newn = &((*newn)->rb_left);
1509         else {
1510             spin_unlock(&rgd->rd_rsspin);
1511             WARN_ON(1);
1512             return;
1513         }
1514     }
1515 
1516     rb_link_node(&rs->rs_node, parent, newn);
1517     rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1518 
1519     /* Do our rgrp accounting for the reservation */
1520     rgd->rd_requested += rs->rs_requested; /* blocks requested */
1521     spin_unlock(&rgd->rd_rsspin);
1522     trace_gfs2_rs(rs, TRACE_RS_INSERT);
1523 }
1524 
1525 /**
1526  * rgd_free - return the number of free blocks we can allocate
1527  * @rgd: the resource group
1528  * @rs: The reservation to free
1529  *
1530  * This function returns the number of free blocks for an rgrp.
1531  * That's the clone-free blocks (blocks that are free, not including those
1532  * still being used for unlinked files that haven't been deleted.)
1533  *
1534  * It also subtracts any blocks reserved by someone else, but does not
1535  * include free blocks that are still part of our current reservation,
1536  * because obviously we can (and will) allocate them.
1537  */
1538 static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
1539 {
1540     u32 tot_reserved, tot_free;
1541 
1542     if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested))
1543         return 0;
1544     tot_reserved = rgd->rd_requested - rs->rs_requested;
1545 
1546     if (rgd->rd_free_clone < tot_reserved)
1547         tot_reserved = 0;
1548 
1549     tot_free = rgd->rd_free_clone - tot_reserved;
1550 
1551     return tot_free;
1552 }
1553 
1554 /**
1555  * rg_mblk_search - find a group of multiple free blocks to form a reservation
1556  * @rgd: the resource group descriptor
1557  * @ip: pointer to the inode for which we're reserving blocks
1558  * @ap: the allocation parameters
1559  *
1560  */
1561 
1562 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1563                const struct gfs2_alloc_parms *ap)
1564 {
1565     struct gfs2_rbm rbm = { .rgd = rgd, };
1566     u64 goal;
1567     struct gfs2_blkreserv *rs = &ip->i_res;
1568     u32 extlen;
1569     u32 free_blocks, blocks_available;
1570     int ret;
1571     struct inode *inode = &ip->i_inode;
1572 
1573     spin_lock(&rgd->rd_rsspin);
1574     free_blocks = rgd_free(rgd, rs);
1575     if (rgd->rd_free_clone < rgd->rd_requested)
1576         free_blocks = 0;
1577     blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
1578     if (rgd == rs->rs_rgd)
1579         blocks_available += rs->rs_reserved;
1580     spin_unlock(&rgd->rd_rsspin);
1581 
1582     if (S_ISDIR(inode->i_mode))
1583         extlen = 1;
1584     else {
1585         extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
1586         extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
1587     }
1588     if (free_blocks < extlen || blocks_available < extlen)
1589         return;
1590 
1591     /* Find bitmap block that contains bits for goal block */
1592     if (rgrp_contains_block(rgd, ip->i_goal))
1593         goal = ip->i_goal;
1594     else
1595         goal = rgd->rd_last_alloc + rgd->rd_data0;
1596 
1597     if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1598         return;
1599 
1600     ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true);
1601     if (ret == 0) {
1602         rs->rs_start = gfs2_rbm_to_block(&rbm);
1603         rs->rs_requested = extlen;
1604         rs_insert(ip);
1605     } else {
1606         if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1607             rgd->rd_last_alloc = 0;
1608     }
1609 }
1610 
1611 /**
1612  * gfs2_next_unreserved_block - Return next block that is not reserved
1613  * @rgd: The resource group
1614  * @block: The starting block
1615  * @length: The required length
1616  * @ignore_rs: Reservation to ignore
1617  *
1618  * If the block does not appear in any reservation, then return the
1619  * block number unchanged. If it does appear in the reservation, then
1620  * keep looking through the tree of reservations in order to find the
1621  * first block number which is not reserved.
1622  */
1623 
1624 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1625                       u32 length,
1626                       struct gfs2_blkreserv *ignore_rs)
1627 {
1628     struct gfs2_blkreserv *rs;
1629     struct rb_node *n;
1630     int rc;
1631 
1632     spin_lock(&rgd->rd_rsspin);
1633     n = rgd->rd_rstree.rb_node;
1634     while (n) {
1635         rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1636         rc = rs_cmp(block, length, rs);
1637         if (rc < 0)
1638             n = n->rb_left;
1639         else if (rc > 0)
1640             n = n->rb_right;
1641         else
1642             break;
1643     }
1644 
1645     if (n) {
1646         while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) {
1647             block = rs->rs_start + rs->rs_requested;
1648             n = n->rb_right;
1649             if (n == NULL)
1650                 break;
1651             rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1652         }
1653     }
1654 
1655     spin_unlock(&rgd->rd_rsspin);
1656     return block;
1657 }
1658 
1659 /**
1660  * gfs2_reservation_check_and_update - Check for reservations during block alloc
1661  * @rbm: The current position in the resource group
1662  * @rs: Our own reservation
1663  * @minext: The minimum extent length
1664  * @maxext: A pointer to the maximum extent structure
1665  *
1666  * This checks the current position in the rgrp to see whether there is
1667  * a reservation covering this block. If not then this function is a
1668  * no-op. If there is, then the position is moved to the end of the
1669  * contiguous reservation(s) so that we are pointing at the first
1670  * non-reserved block.
1671  *
1672  * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1673  */
1674 
1675 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1676                          struct gfs2_blkreserv *rs,
1677                          u32 minext,
1678                          struct gfs2_extent *maxext)
1679 {
1680     u64 block = gfs2_rbm_to_block(rbm);
1681     u32 extlen = 1;
1682     u64 nblock;
1683 
1684     /*
1685      * If we have a minimum extent length, then skip over any extent
1686      * which is less than the min extent length in size.
1687      */
1688     if (minext > 1) {
1689         extlen = gfs2_free_extlen(rbm, minext);
1690         if (extlen <= maxext->len)
1691             goto fail;
1692     }
1693 
1694     /*
1695      * Check the extent which has been found against the reservations
1696      * and skip if parts of it are already reserved
1697      */
1698     nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs);
1699     if (nblock == block) {
1700         if (!minext || extlen >= minext)
1701             return 0;
1702 
1703         if (extlen > maxext->len) {
1704             maxext->len = extlen;
1705             maxext->rbm = *rbm;
1706         }
1707     } else {
1708         u64 len = nblock - block;
1709         if (len >= (u64)1 << 32)
1710             return -E2BIG;
1711         extlen = len;
1712     }
1713 fail:
1714     if (gfs2_rbm_add(rbm, extlen))
1715         return -E2BIG;
1716     return 1;
1717 }
1718 
1719 /**
1720  * gfs2_rbm_find - Look for blocks of a particular state
1721  * @rbm: Value/result starting position and final position
1722  * @state: The state which we want to find
1723  * @minext: Pointer to the requested extent length
1724  *          This is updated to be the actual reservation size.
1725  * @rs: Our own reservation (NULL to skip checking for reservations)
1726  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1727  *          around until we've reached the starting point.
1728  *
1729  * Side effects:
1730  * - If looking for free blocks, we set GBF_FULL on each bitmap which
1731  *   has no free blocks in it.
1732  * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1733  *   has come up short on a free block search.
1734  *
1735  * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1736  */
1737 
1738 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1739              struct gfs2_blkreserv *rs, bool nowrap)
1740 {
1741     bool scan_from_start = rbm->bii == 0 && rbm->offset == 0;
1742     struct buffer_head *bh;
1743     int last_bii;
1744     u32 offset;
1745     u8 *buffer;
1746     bool wrapped = false;
1747     int ret;
1748     struct gfs2_bitmap *bi;
1749     struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1750 
1751     /*
1752      * Determine the last bitmap to search.  If we're not starting at the
1753      * beginning of a bitmap, we need to search that bitmap twice to scan
1754      * the entire resource group.
1755      */
1756     last_bii = rbm->bii - (rbm->offset == 0);
1757 
1758     while(1) {
1759         bi = rbm_bi(rbm);
1760         if (test_bit(GBF_FULL, &bi->bi_flags) &&
1761             (state == GFS2_BLKST_FREE))
1762             goto next_bitmap;
1763 
1764         bh = bi->bi_bh;
1765         buffer = bh->b_data + bi->bi_offset;
1766         WARN_ON(!buffer_uptodate(bh));
1767         if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1768             buffer = bi->bi_clone + bi->bi_offset;
1769         offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state);
1770         if (offset == BFITNOENT) {
1771             if (state == GFS2_BLKST_FREE && rbm->offset == 0)
1772                 set_bit(GBF_FULL, &bi->bi_flags);
1773             goto next_bitmap;
1774         }
1775         rbm->offset = offset;
1776         if (!rs || !minext)
1777             return 0;
1778 
1779         ret = gfs2_reservation_check_and_update(rbm, rs, *minext,
1780                             &maxext);
1781         if (ret == 0)
1782             return 0;
1783         if (ret > 0)
1784             goto next_iter;
1785         if (ret == -E2BIG) {
1786             rbm->bii = 0;
1787             rbm->offset = 0;
1788             goto res_covered_end_of_rgrp;
1789         }
1790         return ret;
1791 
1792 next_bitmap:    /* Find next bitmap in the rgrp */
1793         rbm->offset = 0;
1794         rbm->bii++;
1795         if (rbm->bii == rbm->rgd->rd_length)
1796             rbm->bii = 0;
1797 res_covered_end_of_rgrp:
1798         if (rbm->bii == 0) {
1799             if (wrapped)
1800                 break;
1801             wrapped = true;
1802             if (nowrap)
1803                 break;
1804         }
1805 next_iter:
1806         /* Have we scanned the entire resource group? */
1807         if (wrapped && rbm->bii > last_bii)
1808             break;
1809     }
1810 
1811     if (state != GFS2_BLKST_FREE)
1812         return -ENOSPC;
1813 
1814     /* If the extent was too small, and it's smaller than the smallest
1815        to have failed before, remember for future reference that it's
1816        useless to search this rgrp again for this amount or more. */
1817     if (wrapped && (scan_from_start || rbm->bii > last_bii) &&
1818         *minext < rbm->rgd->rd_extfail_pt)
1819         rbm->rgd->rd_extfail_pt = *minext - 1;
1820 
1821     /* If the maximum extent we found is big enough to fulfill the
1822        minimum requirements, use it anyway. */
1823     if (maxext.len) {
1824         *rbm = maxext.rbm;
1825         *minext = maxext.len;
1826         return 0;
1827     }
1828 
1829     return -ENOSPC;
1830 }
1831 
1832 /**
1833  * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1834  * @rgd: The rgrp
1835  * @last_unlinked: block address of the last dinode we unlinked
1836  * @skip: block address we should explicitly not unlink
1837  *
1838  * Returns: 0 if no error
1839  *          The inode, if one has been found, in inode.
1840  */
1841 
1842 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1843 {
1844     u64 block;
1845     struct gfs2_sbd *sdp = rgd->rd_sbd;
1846     struct gfs2_glock *gl;
1847     struct gfs2_inode *ip;
1848     int error;
1849     int found = 0;
1850     struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1851 
1852     while (1) {
1853         error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1854                       true);
1855         if (error == -ENOSPC)
1856             break;
1857         if (WARN_ON_ONCE(error))
1858             break;
1859 
1860         block = gfs2_rbm_to_block(&rbm);
1861         if (gfs2_rbm_from_block(&rbm, block + 1))
1862             break;
1863         if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1864             continue;
1865         if (block == skip)
1866             continue;
1867         *last_unlinked = block;
1868 
1869         error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
1870         if (error)
1871             continue;
1872 
1873         /* If the inode is already in cache, we can ignore it here
1874          * because the existing inode disposal code will deal with
1875          * it when all refs have gone away. Accessing gl_object like
1876          * this is not safe in general. Here it is ok because we do
1877          * not dereference the pointer, and we only need an approx
1878          * answer to whether it is NULL or not.
1879          */
1880         ip = gl->gl_object;
1881 
1882         if (ip || !gfs2_queue_delete_work(gl, 0))
1883             gfs2_glock_put(gl);
1884         else
1885             found++;
1886 
1887         /* Limit reclaim to sensible number of tasks */
1888         if (found > NR_CPUS)
1889             return;
1890     }
1891 
1892     rgd->rd_flags &= ~GFS2_RDF_CHECK;
1893     return;
1894 }
1895 
1896 /**
1897  * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1898  * @rgd: The rgrp in question
1899  * @loops: An indication of how picky we can be (0=very, 1=less so)
1900  *
1901  * This function uses the recently added glock statistics in order to
1902  * figure out whether a parciular resource group is suffering from
1903  * contention from multiple nodes. This is done purely on the basis
1904  * of timings, since this is the only data we have to work with and
1905  * our aim here is to reject a resource group which is highly contended
1906  * but (very important) not to do this too often in order to ensure that
1907  * we do not land up introducing fragmentation by changing resource
1908  * groups when not actually required.
1909  *
1910  * The calculation is fairly simple, we want to know whether the SRTTB
1911  * (i.e. smoothed round trip time for blocking operations) to acquire
1912  * the lock for this rgrp's glock is significantly greater than the
1913  * time taken for resource groups on average. We introduce a margin in
1914  * the form of the variable @var which is computed as the sum of the two
1915  * respective variences, and multiplied by a factor depending on @loops
1916  * and whether we have a lot of data to base the decision on. This is
1917  * then tested against the square difference of the means in order to
1918  * decide whether the result is statistically significant or not.
1919  *
1920  * Returns: A boolean verdict on the congestion status
1921  */
1922 
1923 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1924 {
1925     const struct gfs2_glock *gl = rgd->rd_gl;
1926     const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1927     struct gfs2_lkstats *st;
1928     u64 r_dcount, l_dcount;
1929     u64 l_srttb, a_srttb = 0;
1930     s64 srttb_diff;
1931     u64 sqr_diff;
1932     u64 var;
1933     int cpu, nonzero = 0;
1934 
1935     preempt_disable();
1936     for_each_present_cpu(cpu) {
1937         st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1938         if (st->stats[GFS2_LKS_SRTTB]) {
1939             a_srttb += st->stats[GFS2_LKS_SRTTB];
1940             nonzero++;
1941         }
1942     }
1943     st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1944     if (nonzero)
1945         do_div(a_srttb, nonzero);
1946     r_dcount = st->stats[GFS2_LKS_DCOUNT];
1947     var = st->stats[GFS2_LKS_SRTTVARB] +
1948           gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1949     preempt_enable();
1950 
1951     l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1952     l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1953 
1954     if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1955         return false;
1956 
1957     srttb_diff = a_srttb - l_srttb;
1958     sqr_diff = srttb_diff * srttb_diff;
1959 
1960     var *= 2;
1961     if (l_dcount < 8 || r_dcount < 8)
1962         var *= 2;
1963     if (loops == 1)
1964         var *= 2;
1965 
1966     return ((srttb_diff < 0) && (sqr_diff > var));
1967 }
1968 
1969 /**
1970  * gfs2_rgrp_used_recently
1971  * @rs: The block reservation with the rgrp to test
1972  * @msecs: The time limit in milliseconds
1973  *
1974  * Returns: True if the rgrp glock has been used within the time limit
1975  */
1976 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1977                     u64 msecs)
1978 {
1979     u64 tdiff;
1980 
1981     tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1982                             rs->rs_rgd->rd_gl->gl_dstamp));
1983 
1984     return tdiff > (msecs * 1000 * 1000);
1985 }
1986 
1987 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1988 {
1989     const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1990     u32 skip;
1991 
1992     get_random_bytes(&skip, sizeof(skip));
1993     return skip % sdp->sd_rgrps;
1994 }
1995 
1996 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1997 {
1998     struct gfs2_rgrpd *rgd = *pos;
1999     struct gfs2_sbd *sdp = rgd->rd_sbd;
2000 
2001     rgd = gfs2_rgrpd_get_next(rgd);
2002     if (rgd == NULL)
2003         rgd = gfs2_rgrpd_get_first(sdp);
2004     *pos = rgd;
2005     if (rgd != begin) /* If we didn't wrap */
2006         return true;
2007     return false;
2008 }
2009 
2010 /**
2011  * fast_to_acquire - determine if a resource group will be fast to acquire
2012  * @rgd: The rgrp
2013  *
2014  * If this is one of our preferred rgrps, it should be quicker to acquire,
2015  * because we tried to set ourselves up as dlm lock master.
2016  */
2017 static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
2018 {
2019     struct gfs2_glock *gl = rgd->rd_gl;
2020 
2021     if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
2022         !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
2023         !test_bit(GLF_DEMOTE, &gl->gl_flags))
2024         return 1;
2025     if (rgd->rd_flags & GFS2_RDF_PREFERRED)
2026         return 1;
2027     return 0;
2028 }
2029 
2030 /**
2031  * gfs2_inplace_reserve - Reserve space in the filesystem
2032  * @ip: the inode to reserve space for
2033  * @ap: the allocation parameters
2034  *
2035  * We try our best to find an rgrp that has at least ap->target blocks
2036  * available. After a couple of passes (loops == 2), the prospects of finding
2037  * such an rgrp diminish. At this stage, we return the first rgrp that has
2038  * at least ap->min_target blocks available.
2039  *
2040  * Returns: 0 on success,
2041  *          -ENOMEM if a suitable rgrp can't be found
2042  *          errno otherwise
2043  */
2044 
2045 int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
2046 {
2047     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2048     struct gfs2_rgrpd *begin = NULL;
2049     struct gfs2_blkreserv *rs = &ip->i_res;
2050     int error = 0, flags = LM_FLAG_NODE_SCOPE;
2051     bool rg_locked;
2052     u64 last_unlinked = NO_BLOCK;
2053     u32 target = ap->target;
2054     int loops = 0;
2055     u32 free_blocks, blocks_available, skip = 0;
2056 
2057     BUG_ON(rs->rs_reserved);
2058 
2059     if (sdp->sd_args.ar_rgrplvb)
2060         flags |= GL_SKIP;
2061     if (gfs2_assert_warn(sdp, target))
2062         return -EINVAL;
2063     if (gfs2_rs_active(rs)) {
2064         begin = rs->rs_rgd;
2065     } else if (rs->rs_rgd &&
2066            rgrp_contains_block(rs->rs_rgd, ip->i_goal)) {
2067         begin = rs->rs_rgd;
2068     } else {
2069         check_and_update_goal(ip);
2070         rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
2071     }
2072     if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
2073         skip = gfs2_orlov_skip(ip);
2074     if (rs->rs_rgd == NULL)
2075         return -EBADSLT;
2076 
2077     while (loops < 3) {
2078         struct gfs2_rgrpd *rgd;
2079 
2080         rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
2081         if (rg_locked) {
2082             rgrp_lock_local(rs->rs_rgd);
2083         } else {
2084             if (skip && skip--)
2085                 goto next_rgrp;
2086             if (!gfs2_rs_active(rs)) {
2087                 if (loops == 0 &&
2088                     !fast_to_acquire(rs->rs_rgd))
2089                     goto next_rgrp;
2090                 if ((loops < 2) &&
2091                     gfs2_rgrp_used_recently(rs, 1000) &&
2092                     gfs2_rgrp_congested(rs->rs_rgd, loops))
2093                     goto next_rgrp;
2094             }
2095             error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
2096                            LM_ST_EXCLUSIVE, flags,
2097                            &ip->i_rgd_gh);
2098             if (unlikely(error))
2099                 return error;
2100             rgrp_lock_local(rs->rs_rgd);
2101             if (!gfs2_rs_active(rs) && (loops < 2) &&
2102                 gfs2_rgrp_congested(rs->rs_rgd, loops))
2103                 goto skip_rgrp;
2104             if (sdp->sd_args.ar_rgrplvb) {
2105                 error = update_rgrp_lvb(rs->rs_rgd,
2106                             &ip->i_rgd_gh);
2107                 if (unlikely(error)) {
2108                     rgrp_unlock_local(rs->rs_rgd);
2109                     gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2110                     return error;
2111                 }
2112             }
2113         }
2114 
2115         /* Skip unusable resource groups */
2116         if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC |
2117                          GFS2_RDF_ERROR)) ||
2118             (loops == 0 && target > rs->rs_rgd->rd_extfail_pt))
2119             goto skip_rgrp;
2120 
2121         if (sdp->sd_args.ar_rgrplvb) {
2122             error = gfs2_instantiate(&ip->i_rgd_gh);
2123             if (error)
2124                 goto skip_rgrp;
2125         }
2126 
2127         /* Get a reservation if we don't already have one */
2128         if (!gfs2_rs_active(rs))
2129             rg_mblk_search(rs->rs_rgd, ip, ap);
2130 
2131         /* Skip rgrps when we can't get a reservation on first pass */
2132         if (!gfs2_rs_active(rs) && (loops < 1))
2133             goto check_rgrp;
2134 
2135         /* If rgrp has enough free space, use it */
2136         rgd = rs->rs_rgd;
2137         spin_lock(&rgd->rd_rsspin);
2138         free_blocks = rgd_free(rgd, rs);
2139         blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
2140         if (free_blocks < target || blocks_available < target) {
2141             spin_unlock(&rgd->rd_rsspin);
2142             goto check_rgrp;
2143         }
2144         rs->rs_reserved = ap->target;
2145         if (rs->rs_reserved > blocks_available)
2146             rs->rs_reserved = blocks_available;
2147         rgd->rd_reserved += rs->rs_reserved;
2148         spin_unlock(&rgd->rd_rsspin);
2149         rgrp_unlock_local(rs->rs_rgd);
2150         return 0;
2151 check_rgrp:
2152         /* Check for unlinked inodes which can be reclaimed */
2153         if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
2154             try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
2155                     ip->i_no_addr);
2156 skip_rgrp:
2157         rgrp_unlock_local(rs->rs_rgd);
2158 
2159         /* Drop reservation, if we couldn't use reserved rgrp */
2160         if (gfs2_rs_active(rs))
2161             gfs2_rs_deltree(rs);
2162 
2163         /* Unlock rgrp if required */
2164         if (!rg_locked)
2165             gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2166 next_rgrp:
2167         /* Find the next rgrp, and continue looking */
2168         if (gfs2_select_rgrp(&rs->rs_rgd, begin))
2169             continue;
2170         if (skip)
2171             continue;
2172 
2173         /* If we've scanned all the rgrps, but found no free blocks
2174          * then this checks for some less likely conditions before
2175          * trying again.
2176          */
2177         loops++;
2178         /* Check that fs hasn't grown if writing to rindex */
2179         if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2180             error = gfs2_ri_update(ip);
2181             if (error)
2182                 return error;
2183         }
2184         /* Flushing the log may release space */
2185         if (loops == 2) {
2186             if (ap->min_target)
2187                 target = ap->min_target;
2188             gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
2189                        GFS2_LFC_INPLACE_RESERVE);
2190         }
2191     }
2192 
2193     return -ENOSPC;
2194 }
2195 
2196 /**
2197  * gfs2_inplace_release - release an inplace reservation
2198  * @ip: the inode the reservation was taken out on
2199  *
2200  * Release a reservation made by gfs2_inplace_reserve().
2201  */
2202 
2203 void gfs2_inplace_release(struct gfs2_inode *ip)
2204 {
2205     struct gfs2_blkreserv *rs = &ip->i_res;
2206 
2207     if (rs->rs_reserved) {
2208         struct gfs2_rgrpd *rgd = rs->rs_rgd;
2209 
2210         spin_lock(&rgd->rd_rsspin);
2211         GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved < rs->rs_reserved);
2212         rgd->rd_reserved -= rs->rs_reserved;
2213         spin_unlock(&rgd->rd_rsspin);
2214         rs->rs_reserved = 0;
2215     }
2216     if (gfs2_holder_initialized(&ip->i_rgd_gh))
2217         gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2218 }
2219 
2220 /**
2221  * gfs2_alloc_extent - allocate an extent from a given bitmap
2222  * @rbm: the resource group information
2223  * @dinode: TRUE if the first block we allocate is for a dinode
2224  * @n: The extent length (value/result)
2225  *
2226  * Add the bitmap buffer to the transaction.
2227  * Set the found bits to @new_state to change block's allocation state.
2228  */
2229 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2230                  unsigned int *n)
2231 {
2232     struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2233     const unsigned int elen = *n;
2234     u64 block;
2235     int ret;
2236 
2237     *n = 1;
2238     block = gfs2_rbm_to_block(rbm);
2239     gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2240     gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2241     block++;
2242     while (*n < elen) {
2243         ret = gfs2_rbm_from_block(&pos, block);
2244         if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
2245             break;
2246         gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2247         gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2248         (*n)++;
2249         block++;
2250     }
2251 }
2252 
2253 /**
2254  * rgblk_free - Change alloc state of given block(s)
2255  * @sdp: the filesystem
2256  * @rgd: the resource group the blocks are in
2257  * @bstart: the start of a run of blocks to free
2258  * @blen: the length of the block run (all must lie within ONE RG!)
2259  * @new_state: GFS2_BLKST_XXX the after-allocation block state
2260  */
2261 
2262 static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
2263                u64 bstart, u32 blen, unsigned char new_state)
2264 {
2265     struct gfs2_rbm rbm;
2266     struct gfs2_bitmap *bi, *bi_prev = NULL;
2267 
2268     rbm.rgd = rgd;
2269     if (WARN_ON_ONCE(gfs2_rbm_from_block(&rbm, bstart)))
2270         return;
2271     while (blen--) {
2272         bi = rbm_bi(&rbm);
2273         if (bi != bi_prev) {
2274             if (!bi->bi_clone) {
2275                 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2276                               GFP_NOFS | __GFP_NOFAIL);
2277                 memcpy(bi->bi_clone + bi->bi_offset,
2278                        bi->bi_bh->b_data + bi->bi_offset,
2279                        bi->bi_bytes);
2280             }
2281             gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2282             bi_prev = bi;
2283         }
2284         gfs2_setbit(&rbm, false, new_state);
2285         gfs2_rbm_add(&rbm, 1);
2286     }
2287 }
2288 
2289 /**
2290  * gfs2_rgrp_dump - print out an rgrp
2291  * @seq: The iterator
2292  * @rgd: The rgrp in question
2293  * @fs_id_buf: pointer to file system id (if requested)
2294  *
2295  */
2296 
2297 void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
2298             const char *fs_id_buf)
2299 {
2300     struct gfs2_blkreserv *trs;
2301     const struct rb_node *n;
2302 
2303     spin_lock(&rgd->rd_rsspin);
2304     gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
2305                fs_id_buf,
2306                (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2307                rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2308                rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
2309     if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
2310         struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
2311 
2312         gfs2_print_dbg(seq, "%s  L: f:%02x b:%u i:%u\n", fs_id_buf,
2313                    be32_to_cpu(rgl->rl_flags),
2314                    be32_to_cpu(rgl->rl_free),
2315                    be32_to_cpu(rgl->rl_dinodes));
2316     }
2317     for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2318         trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2319         dump_rs(seq, trs, fs_id_buf);
2320     }
2321     spin_unlock(&rgd->rd_rsspin);
2322 }
2323 
2324 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2325 {
2326     struct gfs2_sbd *sdp = rgd->rd_sbd;
2327     char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
2328 
2329     fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2330         (unsigned long long)rgd->rd_addr);
2331     fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2332     sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
2333     gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
2334     rgd->rd_flags |= GFS2_RDF_ERROR;
2335 }
2336 
2337 /**
2338  * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2339  * @ip: The inode we have just allocated blocks for
2340  * @rbm: The start of the allocated blocks
2341  * @len: The extent length
2342  *
2343  * Adjusts a reservation after an allocation has taken place. If the
2344  * reservation does not match the allocation, or if it is now empty
2345  * then it is removed.
2346  */
2347 
2348 static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2349                     const struct gfs2_rbm *rbm, unsigned len)
2350 {
2351     struct gfs2_blkreserv *rs = &ip->i_res;
2352     struct gfs2_rgrpd *rgd = rbm->rgd;
2353 
2354     BUG_ON(rs->rs_reserved < len);
2355     rs->rs_reserved -= len;
2356     if (gfs2_rs_active(rs)) {
2357         u64 start = gfs2_rbm_to_block(rbm);
2358 
2359         if (rs->rs_start == start) {
2360             unsigned int rlen;
2361 
2362             rs->rs_start += len;
2363             rlen = min(rs->rs_requested, len);
2364             rs->rs_requested -= rlen;
2365             rgd->rd_requested -= rlen;
2366             trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2367             if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
2368                 rs->rs_requested)
2369                 return;
2370             /* We used up our block reservation, so we should
2371                reserve more blocks next time. */
2372             atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
2373         }
2374         __rs_deltree(rs);
2375     }
2376 }
2377 
2378 /**
2379  * gfs2_set_alloc_start - Set starting point for block allocation
2380  * @rbm: The rbm which will be set to the required location
2381  * @ip: The gfs2 inode
2382  * @dinode: Flag to say if allocation includes a new inode
2383  *
2384  * This sets the starting point from the reservation if one is active
2385  * otherwise it falls back to guessing a start point based on the
2386  * inode's goal block or the last allocation point in the rgrp.
2387  */
2388 
2389 static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2390                  const struct gfs2_inode *ip, bool dinode)
2391 {
2392     u64 goal;
2393 
2394     if (gfs2_rs_active(&ip->i_res)) {
2395         goal = ip->i_res.rs_start;
2396     } else {
2397         if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2398             goal = ip->i_goal;
2399         else
2400             goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2401     }
2402     if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
2403         rbm->bii = 0;
2404         rbm->offset = 0;
2405     }
2406 }
2407 
2408 /**
2409  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2410  * @ip: the inode to allocate the block for
2411  * @bn: Used to return the starting block number
2412  * @nblocks: requested number of blocks/extent length (value/result)
2413  * @dinode: 1 if we're allocating a dinode block, else 0
2414  * @generation: the generation number of the inode
2415  *
2416  * Returns: 0 or error
2417  */
2418 
2419 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2420               bool dinode, u64 *generation)
2421 {
2422     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2423     struct buffer_head *dibh;
2424     struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
2425     u64 block; /* block, within the file system scope */
2426     u32 minext = 1;
2427     int error = -ENOSPC;
2428 
2429     BUG_ON(ip->i_res.rs_reserved < *nblocks);
2430 
2431     rgrp_lock_local(rbm.rgd);
2432     if (gfs2_rs_active(&ip->i_res)) {
2433         gfs2_set_alloc_start(&rbm, ip, dinode);
2434         error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
2435     }
2436     if (error == -ENOSPC) {
2437         gfs2_set_alloc_start(&rbm, ip, dinode);
2438         error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false);
2439     }
2440 
2441     /* Since all blocks are reserved in advance, this shouldn't happen */
2442     if (error) {
2443         fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2444             (unsigned long long)ip->i_no_addr, error, *nblocks,
2445             test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2446             rbm.rgd->rd_extfail_pt);
2447         goto rgrp_error;
2448     }
2449 
2450     gfs2_alloc_extent(&rbm, dinode, nblocks);
2451     block = gfs2_rbm_to_block(&rbm);
2452     rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2453     if (!dinode) {
2454         ip->i_goal = block + *nblocks - 1;
2455         error = gfs2_meta_inode_buffer(ip, &dibh);
2456         if (error == 0) {
2457             struct gfs2_dinode *di =
2458                 (struct gfs2_dinode *)dibh->b_data;
2459             gfs2_trans_add_meta(ip->i_gl, dibh);
2460             di->di_goal_meta = di->di_goal_data =
2461                 cpu_to_be64(ip->i_goal);
2462             brelse(dibh);
2463         }
2464     }
2465     spin_lock(&rbm.rgd->rd_rsspin);
2466     gfs2_adjust_reservation(ip, &rbm, *nblocks);
2467     if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) {
2468         fs_warn(sdp, "nblocks=%u\n", *nblocks);
2469         spin_unlock(&rbm.rgd->rd_rsspin);
2470         goto rgrp_error;
2471     }
2472     GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_reserved < *nblocks);
2473     GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free_clone < *nblocks);
2474     GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free < *nblocks);
2475     rbm.rgd->rd_reserved -= *nblocks;
2476     rbm.rgd->rd_free_clone -= *nblocks;
2477     rbm.rgd->rd_free -= *nblocks;
2478     spin_unlock(&rbm.rgd->rd_rsspin);
2479     if (dinode) {
2480         rbm.rgd->rd_dinodes++;
2481         *generation = rbm.rgd->rd_igeneration++;
2482         if (*generation == 0)
2483             *generation = rbm.rgd->rd_igeneration++;
2484     }
2485 
2486     gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2487     gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2488     rgrp_unlock_local(rbm.rgd);
2489 
2490     gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2491     if (dinode)
2492         gfs2_trans_remove_revoke(sdp, block, *nblocks);
2493 
2494     gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2495 
2496     trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2497                    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2498     *bn = block;
2499     return 0;
2500 
2501 rgrp_error:
2502     rgrp_unlock_local(rbm.rgd);
2503     gfs2_rgrp_error(rbm.rgd);
2504     return -EIO;
2505 }
2506 
2507 /**
2508  * __gfs2_free_blocks - free a contiguous run of block(s)
2509  * @ip: the inode these blocks are being freed from
2510  * @rgd: the resource group the blocks are in
2511  * @bstart: first block of a run of contiguous blocks
2512  * @blen: the length of the block run
2513  * @meta: 1 if the blocks represent metadata
2514  *
2515  */
2516 
2517 void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2518             u64 bstart, u32 blen, int meta)
2519 {
2520     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2521 
2522     rgrp_lock_local(rgd);
2523     rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
2524     trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2525     rgd->rd_free += blen;
2526     rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2527     gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2528     gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2529     rgrp_unlock_local(rgd);
2530 
2531     /* Directories keep their data in the metadata address space */
2532     if (meta || ip->i_depth || gfs2_is_jdata(ip))
2533         gfs2_journal_wipe(ip, bstart, blen);
2534 }
2535 
2536 /**
2537  * gfs2_free_meta - free a contiguous run of data block(s)
2538  * @ip: the inode these blocks are being freed from
2539  * @rgd: the resource group the blocks are in
2540  * @bstart: first block of a run of contiguous blocks
2541  * @blen: the length of the block run
2542  *
2543  */
2544 
2545 void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2546             u64 bstart, u32 blen)
2547 {
2548     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2549 
2550     __gfs2_free_blocks(ip, rgd, bstart, blen, 1);
2551     gfs2_statfs_change(sdp, 0, +blen, 0);
2552     gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2553 }
2554 
2555 void gfs2_unlink_di(struct inode *inode)
2556 {
2557     struct gfs2_inode *ip = GFS2_I(inode);
2558     struct gfs2_sbd *sdp = GFS2_SB(inode);
2559     struct gfs2_rgrpd *rgd;
2560     u64 blkno = ip->i_no_addr;
2561 
2562     rgd = gfs2_blk2rgrpd(sdp, blkno, true);
2563     if (!rgd)
2564         return;
2565     rgrp_lock_local(rgd);
2566     rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2567     trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2568     gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2569     gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2570     be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
2571     rgrp_unlock_local(rgd);
2572 }
2573 
2574 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2575 {
2576     struct gfs2_sbd *sdp = rgd->rd_sbd;
2577 
2578     rgrp_lock_local(rgd);
2579     rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2580     if (!rgd->rd_dinodes)
2581         gfs2_consist_rgrpd(rgd);
2582     rgd->rd_dinodes--;
2583     rgd->rd_free++;
2584 
2585     gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2586     gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2587     rgrp_unlock_local(rgd);
2588     be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
2589 
2590     gfs2_statfs_change(sdp, 0, +1, -1);
2591     trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2592     gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2593     gfs2_journal_wipe(ip, ip->i_no_addr, 1);
2594 }
2595 
2596 /**
2597  * gfs2_check_blk_type - Check the type of a block
2598  * @sdp: The superblock
2599  * @no_addr: The block number to check
2600  * @type: The block type we are looking for
2601  *
2602  * The inode glock of @no_addr must be held.  The @type to check for is either
2603  * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
2604  * or GFS2_BLKST_USED would make no sense.
2605  *
2606  * Returns: 0 if the block type matches the expected type
2607  *          -ESTALE if it doesn't match
2608  *          or -ve errno if something went wrong while checking
2609  */
2610 
2611 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2612 {
2613     struct gfs2_rgrpd *rgd;
2614     struct gfs2_holder rgd_gh;
2615     struct gfs2_rbm rbm;
2616     int error = -EINVAL;
2617 
2618     rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2619     if (!rgd)
2620         goto fail;
2621 
2622     error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2623     if (error)
2624         goto fail;
2625 
2626     rbm.rgd = rgd;
2627     error = gfs2_rbm_from_block(&rbm, no_addr);
2628     if (!WARN_ON_ONCE(error)) {
2629         /*
2630          * No need to take the local resource group lock here; the
2631          * inode glock of @no_addr provides the necessary
2632          * synchronization in case the block is an inode.  (In case
2633          * the block is not an inode, the block type will not match
2634          * the @type we are looking for.)
2635          */
2636         if (gfs2_testbit(&rbm, false) != type)
2637             error = -ESTALE;
2638     }
2639 
2640     gfs2_glock_dq_uninit(&rgd_gh);
2641 
2642 fail:
2643     return error;
2644 }
2645 
2646 /**
2647  * gfs2_rlist_add - add a RG to a list of RGs
2648  * @ip: the inode
2649  * @rlist: the list of resource groups
2650  * @block: the block
2651  *
2652  * Figure out what RG a block belongs to and add that RG to the list
2653  *
2654  * FIXME: Don't use NOFAIL
2655  *
2656  */
2657 
2658 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2659             u64 block)
2660 {
2661     struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2662     struct gfs2_rgrpd *rgd;
2663     struct gfs2_rgrpd **tmp;
2664     unsigned int new_space;
2665     unsigned int x;
2666 
2667     if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2668         return;
2669 
2670     /*
2671      * The resource group last accessed is kept in the last position.
2672      */
2673 
2674     if (rlist->rl_rgrps) {
2675         rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
2676         if (rgrp_contains_block(rgd, block))
2677             return;
2678         rgd = gfs2_blk2rgrpd(sdp, block, 1);
2679     } else {
2680         rgd = ip->i_res.rs_rgd;
2681         if (!rgd || !rgrp_contains_block(rgd, block))
2682             rgd = gfs2_blk2rgrpd(sdp, block, 1);
2683     }
2684 
2685     if (!rgd) {
2686         fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
2687                (unsigned long long)block);
2688         return;
2689     }
2690 
2691     for (x = 0; x < rlist->rl_rgrps; x++) {
2692         if (rlist->rl_rgd[x] == rgd) {
2693             swap(rlist->rl_rgd[x],
2694                  rlist->rl_rgd[rlist->rl_rgrps - 1]);
2695             return;
2696         }
2697     }
2698 
2699     if (rlist->rl_rgrps == rlist->rl_space) {
2700         new_space = rlist->rl_space + 10;
2701 
2702         tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2703                   GFP_NOFS | __GFP_NOFAIL);
2704 
2705         if (rlist->rl_rgd) {
2706             memcpy(tmp, rlist->rl_rgd,
2707                    rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2708             kfree(rlist->rl_rgd);
2709         }
2710 
2711         rlist->rl_space = new_space;
2712         rlist->rl_rgd = tmp;
2713     }
2714 
2715     rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2716 }
2717 
2718 /**
2719  * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2720  *      and initialize an array of glock holders for them
2721  * @rlist: the list of resource groups
2722  * @state: the state we're requesting
2723  * @flags: the modifier flags
2724  *
2725  * FIXME: Don't use NOFAIL
2726  *
2727  */
2728 
2729 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist,
2730               unsigned int state, u16 flags)
2731 {
2732     unsigned int x;
2733 
2734     rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps,
2735                       sizeof(struct gfs2_holder),
2736                       GFP_NOFS | __GFP_NOFAIL);
2737     for (x = 0; x < rlist->rl_rgrps; x++)
2738         gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, state, flags,
2739                  &rlist->rl_ghs[x]);
2740 }
2741 
2742 /**
2743  * gfs2_rlist_free - free a resource group list
2744  * @rlist: the list of resource groups
2745  *
2746  */
2747 
2748 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2749 {
2750     unsigned int x;
2751 
2752     kfree(rlist->rl_rgd);
2753 
2754     if (rlist->rl_ghs) {
2755         for (x = 0; x < rlist->rl_rgrps; x++)
2756             gfs2_holder_uninit(&rlist->rl_ghs[x]);
2757         kfree(rlist->rl_ghs);
2758         rlist->rl_ghs = NULL;
2759     }
2760 }
2761 
2762 void rgrp_lock_local(struct gfs2_rgrpd *rgd)
2763 {
2764     mutex_lock(&rgd->rd_mutex);
2765 }
2766 
2767 void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
2768 {
2769     mutex_unlock(&rgd->rd_mutex);
2770 }