Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
0003  */
0004 
0005 /*
0006  *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
0007  *  Programm System Institute
0008  *  Pereslavl-Zalessky Russia
0009  */
0010 
0011 #include <linux/time.h>
0012 #include <linux/string.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/bio.h>
0015 #include "reiserfs.h"
0016 #include <linux/buffer_head.h>
0017 #include <linux/quotaops.h>
0018 
0019 /* Does the buffer contain a disk block which is in the tree. */
0020 inline int B_IS_IN_TREE(const struct buffer_head *bh)
0021 {
0022 
0023     RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
0024            "PAP-1010: block (%b) has too big level (%z)", bh, bh);
0025 
0026     return (B_LEVEL(bh) != FREE_LEVEL);
0027 }
0028 
0029 /* to get item head in le form */
0030 inline void copy_item_head(struct item_head *to,
0031                const struct item_head *from)
0032 {
0033     memcpy(to, from, IH_SIZE);
0034 }
0035 
0036 /*
0037  * k1 is pointer to on-disk structure which is stored in little-endian
0038  * form. k2 is pointer to cpu variable. For key of items of the same
0039  * object this returns 0.
0040  * Returns: -1 if key1 < key2
0041  * 0 if key1 == key2
0042  * 1 if key1 > key2
0043  */
0044 inline int comp_short_keys(const struct reiserfs_key *le_key,
0045                const struct cpu_key *cpu_key)
0046 {
0047     __u32 n;
0048     n = le32_to_cpu(le_key->k_dir_id);
0049     if (n < cpu_key->on_disk_key.k_dir_id)
0050         return -1;
0051     if (n > cpu_key->on_disk_key.k_dir_id)
0052         return 1;
0053     n = le32_to_cpu(le_key->k_objectid);
0054     if (n < cpu_key->on_disk_key.k_objectid)
0055         return -1;
0056     if (n > cpu_key->on_disk_key.k_objectid)
0057         return 1;
0058     return 0;
0059 }
0060 
0061 /*
0062  * k1 is pointer to on-disk structure which is stored in little-endian
0063  * form. k2 is pointer to cpu variable.
0064  * Compare keys using all 4 key fields.
0065  * Returns: -1 if key1 < key2 0
0066  * if key1 = key2 1 if key1 > key2
0067  */
0068 static inline int comp_keys(const struct reiserfs_key *le_key,
0069                 const struct cpu_key *cpu_key)
0070 {
0071     int retval;
0072 
0073     retval = comp_short_keys(le_key, cpu_key);
0074     if (retval)
0075         return retval;
0076     if (le_key_k_offset(le_key_version(le_key), le_key) <
0077         cpu_key_k_offset(cpu_key))
0078         return -1;
0079     if (le_key_k_offset(le_key_version(le_key), le_key) >
0080         cpu_key_k_offset(cpu_key))
0081         return 1;
0082 
0083     if (cpu_key->key_length == 3)
0084         return 0;
0085 
0086     /* this part is needed only when tail conversion is in progress */
0087     if (le_key_k_type(le_key_version(le_key), le_key) <
0088         cpu_key_k_type(cpu_key))
0089         return -1;
0090 
0091     if (le_key_k_type(le_key_version(le_key), le_key) >
0092         cpu_key_k_type(cpu_key))
0093         return 1;
0094 
0095     return 0;
0096 }
0097 
0098 inline int comp_short_le_keys(const struct reiserfs_key *key1,
0099                   const struct reiserfs_key *key2)
0100 {
0101     __u32 *k1_u32, *k2_u32;
0102     int key_length = REISERFS_SHORT_KEY_LEN;
0103 
0104     k1_u32 = (__u32 *) key1;
0105     k2_u32 = (__u32 *) key2;
0106     for (; key_length--; ++k1_u32, ++k2_u32) {
0107         if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
0108             return -1;
0109         if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
0110             return 1;
0111     }
0112     return 0;
0113 }
0114 
0115 inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
0116 {
0117     int version;
0118     to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
0119     to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
0120 
0121     /* find out version of the key */
0122     version = le_key_version(from);
0123     to->version = version;
0124     to->on_disk_key.k_offset = le_key_k_offset(version, from);
0125     to->on_disk_key.k_type = le_key_k_type(version, from);
0126 }
0127 
0128 /*
0129  * this does not say which one is bigger, it only returns 1 if keys
0130  * are not equal, 0 otherwise
0131  */
0132 inline int comp_le_keys(const struct reiserfs_key *k1,
0133             const struct reiserfs_key *k2)
0134 {
0135     return memcmp(k1, k2, sizeof(struct reiserfs_key));
0136 }
0137 
0138 /**************************************************************************
0139  *  Binary search toolkit function                                        *
0140  *  Search for an item in the array by the item key                       *
0141  *  Returns:    1 if found,  0 if not found;                              *
0142  *        *pos = number of the searched element if found, else the        *
0143  *        number of the first element that is larger than key.            *
0144  **************************************************************************/
0145 /*
0146  * For those not familiar with binary search: lbound is the leftmost item
0147  * that it could be, rbound the rightmost item that it could be.  We examine
0148  * the item halfway between lbound and rbound, and that tells us either
0149  * that we can increase lbound, or decrease rbound, or that we have found it,
0150  * or if lbound <= rbound that there are no possible items, and we have not
0151  * found it. With each examination we cut the number of possible items it
0152  * could be by one more than half rounded down, or we find it.
0153  */
0154 static inline int bin_search(const void *key,   /* Key to search for. */
0155                  const void *base,  /* First item in the array. */
0156                  int num,   /* Number of items in the array. */
0157                  /*
0158                   * Item size in the array.  searched. Lest the
0159                   * reader be confused, note that this is crafted
0160                   * as a general function, and when it is applied
0161                   * specifically to the array of item headers in a
0162                   * node, width is actually the item header size
0163                   * not the item size.
0164                   */
0165                  int width,
0166                  int *pos /* Number of the searched for element. */
0167     )
0168 {
0169     int rbound, lbound, j;
0170 
0171     for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
0172          lbound <= rbound; j = (rbound + lbound) / 2)
0173         switch (comp_keys
0174             ((struct reiserfs_key *)((char *)base + j * width),
0175              (struct cpu_key *)key)) {
0176         case -1:
0177             lbound = j + 1;
0178             continue;
0179         case 1:
0180             rbound = j - 1;
0181             continue;
0182         case 0:
0183             *pos = j;
0184             return ITEM_FOUND;  /* Key found in the array.  */
0185         }
0186 
0187     /*
0188      * bin_search did not find given key, it returns position of key,
0189      * that is minimal and greater than the given one.
0190      */
0191     *pos = lbound;
0192     return ITEM_NOT_FOUND;
0193 }
0194 
0195 
0196 /* Minimal possible key. It is never in the tree. */
0197 const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
0198 
0199 /* Maximal possible key. It is never in the tree. */
0200 static const struct reiserfs_key MAX_KEY = {
0201     cpu_to_le32(0xffffffff),
0202     cpu_to_le32(0xffffffff),
0203     {{cpu_to_le32(0xffffffff),
0204       cpu_to_le32(0xffffffff)},}
0205 };
0206 
0207 /*
0208  * Get delimiting key of the buffer by looking for it in the buffers in the
0209  * path, starting from the bottom of the path, and going upwards.  We must
0210  * check the path's validity at each step.  If the key is not in the path,
0211  * there is no delimiting key in the tree (buffer is first or last buffer
0212  * in tree), and in this case we return a special key, either MIN_KEY or
0213  * MAX_KEY.
0214  */
0215 static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
0216                           const struct super_block *sb)
0217 {
0218     int position, path_offset = chk_path->path_length;
0219     struct buffer_head *parent;
0220 
0221     RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
0222            "PAP-5010: invalid offset in the path");
0223 
0224     /* While not higher in path than first element. */
0225     while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
0226 
0227         RFALSE(!buffer_uptodate
0228                (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
0229                "PAP-5020: parent is not uptodate");
0230 
0231         /* Parent at the path is not in the tree now. */
0232         if (!B_IS_IN_TREE
0233             (parent =
0234              PATH_OFFSET_PBUFFER(chk_path, path_offset)))
0235             return &MAX_KEY;
0236         /* Check whether position in the parent is correct. */
0237         if ((position =
0238              PATH_OFFSET_POSITION(chk_path,
0239                       path_offset)) >
0240             B_NR_ITEMS(parent))
0241             return &MAX_KEY;
0242         /* Check whether parent at the path really points to the child. */
0243         if (B_N_CHILD_NUM(parent, position) !=
0244             PATH_OFFSET_PBUFFER(chk_path,
0245                     path_offset + 1)->b_blocknr)
0246             return &MAX_KEY;
0247         /*
0248          * Return delimiting key if position in the parent
0249          * is not equal to zero.
0250          */
0251         if (position)
0252             return internal_key(parent, position - 1);
0253     }
0254     /* Return MIN_KEY if we are in the root of the buffer tree. */
0255     if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
0256         b_blocknr == SB_ROOT_BLOCK(sb))
0257         return &MIN_KEY;
0258     return &MAX_KEY;
0259 }
0260 
0261 /* Get delimiting key of the buffer at the path and its right neighbor. */
0262 inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
0263                        const struct super_block *sb)
0264 {
0265     int position, path_offset = chk_path->path_length;
0266     struct buffer_head *parent;
0267 
0268     RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
0269            "PAP-5030: invalid offset in the path");
0270 
0271     while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
0272 
0273         RFALSE(!buffer_uptodate
0274                (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
0275                "PAP-5040: parent is not uptodate");
0276 
0277         /* Parent at the path is not in the tree now. */
0278         if (!B_IS_IN_TREE
0279             (parent =
0280              PATH_OFFSET_PBUFFER(chk_path, path_offset)))
0281             return &MIN_KEY;
0282         /* Check whether position in the parent is correct. */
0283         if ((position =
0284              PATH_OFFSET_POSITION(chk_path,
0285                       path_offset)) >
0286             B_NR_ITEMS(parent))
0287             return &MIN_KEY;
0288         /*
0289          * Check whether parent at the path really points
0290          * to the child.
0291          */
0292         if (B_N_CHILD_NUM(parent, position) !=
0293             PATH_OFFSET_PBUFFER(chk_path,
0294                     path_offset + 1)->b_blocknr)
0295             return &MIN_KEY;
0296 
0297         /*
0298          * Return delimiting key if position in the parent
0299          * is not the last one.
0300          */
0301         if (position != B_NR_ITEMS(parent))
0302             return internal_key(parent, position);
0303     }
0304 
0305     /* Return MAX_KEY if we are in the root of the buffer tree. */
0306     if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
0307         b_blocknr == SB_ROOT_BLOCK(sb))
0308         return &MAX_KEY;
0309     return &MIN_KEY;
0310 }
0311 
0312 /*
0313  * Check whether a key is contained in the tree rooted from a buffer at a path.
0314  * This works by looking at the left and right delimiting keys for the buffer
0315  * in the last path_element in the path.  These delimiting keys are stored
0316  * at least one level above that buffer in the tree. If the buffer is the
0317  * first or last node in the tree order then one of the delimiting keys may
0318  * be absent, and in this case get_lkey and get_rkey return a special key
0319  * which is MIN_KEY or MAX_KEY.
0320  */
0321 static inline int key_in_buffer(
0322                 /* Path which should be checked. */
0323                 struct treepath *chk_path,
0324                 /* Key which should be checked. */
0325                 const struct cpu_key *key,
0326                 struct super_block *sb
0327     )
0328 {
0329 
0330     RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
0331            || chk_path->path_length > MAX_HEIGHT,
0332            "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
0333            key, chk_path->path_length);
0334     RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
0335            "PAP-5060: device must not be NODEV");
0336 
0337     if (comp_keys(get_lkey(chk_path, sb), key) == 1)
0338         /* left delimiting key is bigger, that the key we look for */
0339         return 0;
0340     /*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
0341     if (comp_keys(get_rkey(chk_path, sb), key) != 1)
0342         /* key must be less than right delimitiing key */
0343         return 0;
0344     return 1;
0345 }
0346 
0347 int reiserfs_check_path(struct treepath *p)
0348 {
0349     RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
0350            "path not properly relsed");
0351     return 0;
0352 }
0353 
0354 /*
0355  * Drop the reference to each buffer in a path and restore
0356  * dirty bits clean when preparing the buffer for the log.
0357  * This version should only be called from fix_nodes()
0358  */
0359 void pathrelse_and_restore(struct super_block *sb,
0360                struct treepath *search_path)
0361 {
0362     int path_offset = search_path->path_length;
0363 
0364     RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
0365            "clm-4000: invalid path offset");
0366 
0367     while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
0368         struct buffer_head *bh;
0369         bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
0370         reiserfs_restore_prepared_buffer(sb, bh);
0371         brelse(bh);
0372     }
0373     search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
0374 }
0375 
0376 /* Drop the reference to each buffer in a path */
0377 void pathrelse(struct treepath *search_path)
0378 {
0379     int path_offset = search_path->path_length;
0380 
0381     RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
0382            "PAP-5090: invalid path offset");
0383 
0384     while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
0385         brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
0386 
0387     search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
0388 }
0389 
0390 static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
0391 {
0392     struct reiserfs_de_head *deh;
0393     int i;
0394 
0395     deh = B_I_DEH(bh, ih);
0396     for (i = 0; i < ih_entry_count(ih); i++) {
0397         if (deh_location(&deh[i]) > ih_item_len(ih)) {
0398             reiserfs_warning(NULL, "reiserfs-5094",
0399                      "directory entry location seems wrong %h",
0400                      &deh[i]);
0401             return 0;
0402         }
0403     }
0404 
0405     return 1;
0406 }
0407 
0408 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
0409 {
0410     struct block_head *blkh;
0411     struct item_head *ih;
0412     int used_space;
0413     int prev_location;
0414     int i;
0415     int nr;
0416 
0417     blkh = (struct block_head *)buf;
0418     if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
0419         reiserfs_warning(NULL, "reiserfs-5080",
0420                  "this should be caught earlier");
0421         return 0;
0422     }
0423 
0424     nr = blkh_nr_item(blkh);
0425     if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
0426         /* item number is too big or too small */
0427         reiserfs_warning(NULL, "reiserfs-5081",
0428                  "nr_item seems wrong: %z", bh);
0429         return 0;
0430     }
0431     ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
0432     used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
0433 
0434     /* free space does not match to calculated amount of use space */
0435     if (used_space != blocksize - blkh_free_space(blkh)) {
0436         reiserfs_warning(NULL, "reiserfs-5082",
0437                  "free space seems wrong: %z", bh);
0438         return 0;
0439     }
0440     /*
0441      * FIXME: it is_leaf will hit performance too much - we may have
0442      * return 1 here
0443      */
0444 
0445     /* check tables of item heads */
0446     ih = (struct item_head *)(buf + BLKH_SIZE);
0447     prev_location = blocksize;
0448     for (i = 0; i < nr; i++, ih++) {
0449         if (le_ih_k_type(ih) == TYPE_ANY) {
0450             reiserfs_warning(NULL, "reiserfs-5083",
0451                      "wrong item type for item %h",
0452                      ih);
0453             return 0;
0454         }
0455         if (ih_location(ih) >= blocksize
0456             || ih_location(ih) < IH_SIZE * nr) {
0457             reiserfs_warning(NULL, "reiserfs-5084",
0458                      "item location seems wrong: %h",
0459                      ih);
0460             return 0;
0461         }
0462         if (ih_item_len(ih) < 1
0463             || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
0464             reiserfs_warning(NULL, "reiserfs-5085",
0465                      "item length seems wrong: %h",
0466                      ih);
0467             return 0;
0468         }
0469         if (prev_location - ih_location(ih) != ih_item_len(ih)) {
0470             reiserfs_warning(NULL, "reiserfs-5086",
0471                      "item location seems wrong "
0472                      "(second one): %h", ih);
0473             return 0;
0474         }
0475         if (is_direntry_le_ih(ih)) {
0476             if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
0477                 reiserfs_warning(NULL, "reiserfs-5093",
0478                          "item entry count seems wrong %h",
0479                          ih);
0480                 return 0;
0481             }
0482             return has_valid_deh_location(bh, ih);
0483         }
0484         prev_location = ih_location(ih);
0485     }
0486 
0487     /* one may imagine many more checks */
0488     return 1;
0489 }
0490 
0491 /* returns 1 if buf looks like an internal node, 0 otherwise */
0492 static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
0493 {
0494     struct block_head *blkh;
0495     int nr;
0496     int used_space;
0497 
0498     blkh = (struct block_head *)buf;
0499     nr = blkh_level(blkh);
0500     if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
0501         /* this level is not possible for internal nodes */
0502         reiserfs_warning(NULL, "reiserfs-5087",
0503                  "this should be caught earlier");
0504         return 0;
0505     }
0506 
0507     nr = blkh_nr_item(blkh);
0508     /* for internal which is not root we might check min number of keys */
0509     if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
0510         reiserfs_warning(NULL, "reiserfs-5088",
0511                  "number of key seems wrong: %z", bh);
0512         return 0;
0513     }
0514 
0515     used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
0516     if (used_space != blocksize - blkh_free_space(blkh)) {
0517         reiserfs_warning(NULL, "reiserfs-5089",
0518                  "free space seems wrong: %z", bh);
0519         return 0;
0520     }
0521 
0522     /* one may imagine many more checks */
0523     return 1;
0524 }
0525 
0526 /*
0527  * make sure that bh contains formatted node of reiserfs tree of
0528  * 'level'-th level
0529  */
0530 static int is_tree_node(struct buffer_head *bh, int level)
0531 {
0532     if (B_LEVEL(bh) != level) {
0533         reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
0534                  "not match to the expected one %d",
0535                  B_LEVEL(bh), level);
0536         return 0;
0537     }
0538     if (level == DISK_LEAF_NODE_LEVEL)
0539         return is_leaf(bh->b_data, bh->b_size, bh);
0540 
0541     return is_internal(bh->b_data, bh->b_size, bh);
0542 }
0543 
0544 #define SEARCH_BY_KEY_READA 16
0545 
0546 /*
0547  * The function is NOT SCHEDULE-SAFE!
0548  * It might unlock the write lock if we needed to wait for a block
0549  * to be read. Note that in this case it won't recover the lock to avoid
0550  * high contention resulting from too much lock requests, especially
0551  * the caller (search_by_key) will perform other schedule-unsafe
0552  * operations just after calling this function.
0553  *
0554  * @return depth of lock to be restored after read completes
0555  */
0556 static int search_by_key_reada(struct super_block *s,
0557                 struct buffer_head **bh,
0558                 b_blocknr_t *b, int num)
0559 {
0560     int i, j;
0561     int depth = -1;
0562 
0563     for (i = 0; i < num; i++) {
0564         bh[i] = sb_getblk(s, b[i]);
0565     }
0566     /*
0567      * We are going to read some blocks on which we
0568      * have a reference. It's safe, though we might be
0569      * reading blocks concurrently changed if we release
0570      * the lock. But it's still fine because we check later
0571      * if the tree changed
0572      */
0573     for (j = 0; j < i; j++) {
0574         /*
0575          * note, this needs attention if we are getting rid of the BKL
0576          * you have to make sure the prepared bit isn't set on this
0577          * buffer
0578          */
0579         if (!buffer_uptodate(bh[j])) {
0580             if (depth == -1)
0581                 depth = reiserfs_write_unlock_nested(s);
0582             ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, bh + j);
0583         }
0584         brelse(bh[j]);
0585     }
0586     return depth;
0587 }
0588 
0589 /*
0590  * This function fills up the path from the root to the leaf as it
0591  * descends the tree looking for the key.  It uses reiserfs_bread to
0592  * try to find buffers in the cache given their block number.  If it
0593  * does not find them in the cache it reads them from disk.  For each
0594  * node search_by_key finds using reiserfs_bread it then uses
0595  * bin_search to look through that node.  bin_search will find the
0596  * position of the block_number of the next node if it is looking
0597  * through an internal node.  If it is looking through a leaf node
0598  * bin_search will find the position of the item which has key either
0599  * equal to given key, or which is the maximal key less than the given
0600  * key.  search_by_key returns a path that must be checked for the
0601  * correctness of the top of the path but need not be checked for the
0602  * correctness of the bottom of the path
0603  */
0604 /*
0605  * search_by_key - search for key (and item) in stree
0606  * @sb: superblock
0607  * @key: pointer to key to search for
0608  * @search_path: Allocated and initialized struct treepath; Returned filled
0609  *       on success.
0610  * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
0611  *      stop at leaf level.
0612  *
0613  * The function is NOT SCHEDULE-SAFE!
0614  */
0615 int search_by_key(struct super_block *sb, const struct cpu_key *key,
0616           struct treepath *search_path, int stop_level)
0617 {
0618     b_blocknr_t block_number;
0619     int expected_level;
0620     struct buffer_head *bh;
0621     struct path_element *last_element;
0622     int node_level, retval;
0623     int fs_gen;
0624     struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
0625     b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
0626     int reada_count = 0;
0627 
0628 #ifdef CONFIG_REISERFS_CHECK
0629     int repeat_counter = 0;
0630 #endif
0631 
0632     PROC_INFO_INC(sb, search_by_key);
0633 
0634     /*
0635      * As we add each node to a path we increase its count.  This means
0636      * that we must be careful to release all nodes in a path before we
0637      * either discard the path struct or re-use the path struct, as we
0638      * do here.
0639      */
0640 
0641     pathrelse(search_path);
0642 
0643     /*
0644      * With each iteration of this loop we search through the items in the
0645      * current node, and calculate the next current node(next path element)
0646      * for the next iteration of this loop..
0647      */
0648     block_number = SB_ROOT_BLOCK(sb);
0649     expected_level = -1;
0650     while (1) {
0651 
0652 #ifdef CONFIG_REISERFS_CHECK
0653         if (!(++repeat_counter % 50000))
0654             reiserfs_warning(sb, "PAP-5100",
0655                      "%s: there were %d iterations of "
0656                      "while loop looking for key %K",
0657                      current->comm, repeat_counter,
0658                      key);
0659 #endif
0660 
0661         /* prep path to have another element added to it. */
0662         last_element =
0663             PATH_OFFSET_PELEMENT(search_path,
0664                      ++search_path->path_length);
0665         fs_gen = get_generation(sb);
0666 
0667         /*
0668          * Read the next tree node, and set the last element
0669          * in the path to have a pointer to it.
0670          */
0671         if ((bh = last_element->pe_buffer =
0672              sb_getblk(sb, block_number))) {
0673 
0674             /*
0675              * We'll need to drop the lock if we encounter any
0676              * buffers that need to be read. If all of them are
0677              * already up to date, we don't need to drop the lock.
0678              */
0679             int depth = -1;
0680 
0681             if (!buffer_uptodate(bh) && reada_count > 1)
0682                 depth = search_by_key_reada(sb, reada_bh,
0683                             reada_blocks, reada_count);
0684 
0685             if (!buffer_uptodate(bh) && depth == -1)
0686                 depth = reiserfs_write_unlock_nested(sb);
0687 
0688             ll_rw_block(REQ_OP_READ, 1, &bh);
0689             wait_on_buffer(bh);
0690 
0691             if (depth != -1)
0692                 reiserfs_write_lock_nested(sb, depth);
0693             if (!buffer_uptodate(bh))
0694                 goto io_error;
0695         } else {
0696 io_error:
0697             search_path->path_length--;
0698             pathrelse(search_path);
0699             return IO_ERROR;
0700         }
0701         reada_count = 0;
0702         if (expected_level == -1)
0703             expected_level = SB_TREE_HEIGHT(sb);
0704         expected_level--;
0705 
0706         /*
0707          * It is possible that schedule occurred. We must check
0708          * whether the key to search is still in the tree rooted
0709          * from the current buffer. If not then repeat search
0710          * from the root.
0711          */
0712         if (fs_changed(fs_gen, sb) &&
0713             (!B_IS_IN_TREE(bh) ||
0714              B_LEVEL(bh) != expected_level ||
0715              !key_in_buffer(search_path, key, sb))) {
0716             PROC_INFO_INC(sb, search_by_key_fs_changed);
0717             PROC_INFO_INC(sb, search_by_key_restarted);
0718             PROC_INFO_INC(sb,
0719                       sbk_restarted[expected_level - 1]);
0720             pathrelse(search_path);
0721 
0722             /*
0723              * Get the root block number so that we can
0724              * repeat the search starting from the root.
0725              */
0726             block_number = SB_ROOT_BLOCK(sb);
0727             expected_level = -1;
0728 
0729             /* repeat search from the root */
0730             continue;
0731         }
0732 
0733         /*
0734          * only check that the key is in the buffer if key is not
0735          * equal to the MAX_KEY. Latter case is only possible in
0736          * "finish_unfinished()" processing during mount.
0737          */
0738         RFALSE(comp_keys(&MAX_KEY, key) &&
0739                !key_in_buffer(search_path, key, sb),
0740                "PAP-5130: key is not in the buffer");
0741 #ifdef CONFIG_REISERFS_CHECK
0742         if (REISERFS_SB(sb)->cur_tb) {
0743             print_cur_tb("5140");
0744             reiserfs_panic(sb, "PAP-5140",
0745                        "schedule occurred in do_balance!");
0746         }
0747 #endif
0748 
0749         /*
0750          * make sure, that the node contents look like a node of
0751          * certain level
0752          */
0753         if (!is_tree_node(bh, expected_level)) {
0754             reiserfs_error(sb, "vs-5150",
0755                        "invalid format found in block %ld. "
0756                        "Fsck?", bh->b_blocknr);
0757             pathrelse(search_path);
0758             return IO_ERROR;
0759         }
0760 
0761         /* ok, we have acquired next formatted node in the tree */
0762         node_level = B_LEVEL(bh);
0763 
0764         PROC_INFO_BH_STAT(sb, bh, node_level - 1);
0765 
0766         RFALSE(node_level < stop_level,
0767                "vs-5152: tree level (%d) is less than stop level (%d)",
0768                node_level, stop_level);
0769 
0770         retval = bin_search(key, item_head(bh, 0),
0771                       B_NR_ITEMS(bh),
0772                       (node_level ==
0773                        DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
0774                       KEY_SIZE,
0775                       &last_element->pe_position);
0776         if (node_level == stop_level) {
0777             return retval;
0778         }
0779 
0780         /* we are not in the stop level */
0781         /*
0782          * item has been found, so we choose the pointer which
0783          * is to the right of the found one
0784          */
0785         if (retval == ITEM_FOUND)
0786             last_element->pe_position++;
0787 
0788         /*
0789          * if item was not found we choose the position which is to
0790          * the left of the found item. This requires no code,
0791          * bin_search did it already.
0792          */
0793 
0794         /*
0795          * So we have chosen a position in the current node which is
0796          * an internal node.  Now we calculate child block number by
0797          * position in the node.
0798          */
0799         block_number =
0800             B_N_CHILD_NUM(bh, last_element->pe_position);
0801 
0802         /*
0803          * if we are going to read leaf nodes, try for read
0804          * ahead as well
0805          */
0806         if ((search_path->reada & PATH_READA) &&
0807             node_level == DISK_LEAF_NODE_LEVEL + 1) {
0808             int pos = last_element->pe_position;
0809             int limit = B_NR_ITEMS(bh);
0810             struct reiserfs_key *le_key;
0811 
0812             if (search_path->reada & PATH_READA_BACK)
0813                 limit = 0;
0814             while (reada_count < SEARCH_BY_KEY_READA) {
0815                 if (pos == limit)
0816                     break;
0817                 reada_blocks[reada_count++] =
0818                     B_N_CHILD_NUM(bh, pos);
0819                 if (search_path->reada & PATH_READA_BACK)
0820                     pos--;
0821                 else
0822                     pos++;
0823 
0824                 /*
0825                  * check to make sure we're in the same object
0826                  */
0827                 le_key = internal_key(bh, pos);
0828                 if (le32_to_cpu(le_key->k_objectid) !=
0829                     key->on_disk_key.k_objectid) {
0830                     break;
0831                 }
0832             }
0833         }
0834     }
0835 }
0836 
0837 /*
0838  * Form the path to an item and position in this item which contains
0839  * file byte defined by key. If there is no such item
0840  * corresponding to the key, we point the path to the item with
0841  * maximal key less than key, and *pos_in_item is set to one
0842  * past the last entry/byte in the item.  If searching for entry in a
0843  * directory item, and it is not found, *pos_in_item is set to one
0844  * entry more than the entry with maximal key which is less than the
0845  * sought key.
0846  *
0847  * Note that if there is no entry in this same node which is one more,
0848  * then we point to an imaginary entry.  for direct items, the
0849  * position is in units of bytes, for indirect items the position is
0850  * in units of blocknr entries, for directory items the position is in
0851  * units of directory entries.
0852  */
0853 /* The function is NOT SCHEDULE-SAFE! */
0854 int search_for_position_by_key(struct super_block *sb,
0855                    /* Key to search (cpu variable) */
0856                    const struct cpu_key *p_cpu_key,
0857                    /* Filled up by this function. */
0858                    struct treepath *search_path)
0859 {
0860     struct item_head *p_le_ih;  /* pointer to on-disk structure */
0861     int blk_size;
0862     loff_t item_offset, offset;
0863     struct reiserfs_dir_entry de;
0864     int retval;
0865 
0866     /* If searching for directory entry. */
0867     if (is_direntry_cpu_key(p_cpu_key))
0868         return search_by_entry_key(sb, p_cpu_key, search_path,
0869                        &de);
0870 
0871     /* If not searching for directory entry. */
0872 
0873     /* If item is found. */
0874     retval = search_item(sb, p_cpu_key, search_path);
0875     if (retval == IO_ERROR)
0876         return retval;
0877     if (retval == ITEM_FOUND) {
0878 
0879         RFALSE(!ih_item_len
0880                (item_head
0881             (PATH_PLAST_BUFFER(search_path),
0882              PATH_LAST_POSITION(search_path))),
0883                "PAP-5165: item length equals zero");
0884 
0885         pos_in_item(search_path) = 0;
0886         return POSITION_FOUND;
0887     }
0888 
0889     RFALSE(!PATH_LAST_POSITION(search_path),
0890            "PAP-5170: position equals zero");
0891 
0892     /* Item is not found. Set path to the previous item. */
0893     p_le_ih =
0894         item_head(PATH_PLAST_BUFFER(search_path),
0895                --PATH_LAST_POSITION(search_path));
0896     blk_size = sb->s_blocksize;
0897 
0898     if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
0899         return FILE_NOT_FOUND;
0900 
0901     /* FIXME: quite ugly this far */
0902 
0903     item_offset = le_ih_k_offset(p_le_ih);
0904     offset = cpu_key_k_offset(p_cpu_key);
0905 
0906     /* Needed byte is contained in the item pointed to by the path. */
0907     if (item_offset <= offset &&
0908         item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
0909         pos_in_item(search_path) = offset - item_offset;
0910         if (is_indirect_le_ih(p_le_ih)) {
0911             pos_in_item(search_path) /= blk_size;
0912         }
0913         return POSITION_FOUND;
0914     }
0915 
0916     /*
0917      * Needed byte is not contained in the item pointed to by the
0918      * path. Set pos_in_item out of the item.
0919      */
0920     if (is_indirect_le_ih(p_le_ih))
0921         pos_in_item(search_path) =
0922             ih_item_len(p_le_ih) / UNFM_P_SIZE;
0923     else
0924         pos_in_item(search_path) = ih_item_len(p_le_ih);
0925 
0926     return POSITION_NOT_FOUND;
0927 }
0928 
0929 /* Compare given item and item pointed to by the path. */
0930 int comp_items(const struct item_head *stored_ih, const struct treepath *path)
0931 {
0932     struct buffer_head *bh = PATH_PLAST_BUFFER(path);
0933     struct item_head *ih;
0934 
0935     /* Last buffer at the path is not in the tree. */
0936     if (!B_IS_IN_TREE(bh))
0937         return 1;
0938 
0939     /* Last path position is invalid. */
0940     if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
0941         return 1;
0942 
0943     /* we need only to know, whether it is the same item */
0944     ih = tp_item_head(path);
0945     return memcmp(stored_ih, ih, IH_SIZE);
0946 }
0947 
0948 /* prepare for delete or cut of direct item */
0949 static inline int prepare_for_direct_item(struct treepath *path,
0950                       struct item_head *le_ih,
0951                       struct inode *inode,
0952                       loff_t new_file_length, int *cut_size)
0953 {
0954     loff_t round_len;
0955 
0956     if (new_file_length == max_reiserfs_offset(inode)) {
0957         /* item has to be deleted */
0958         *cut_size = -(IH_SIZE + ih_item_len(le_ih));
0959         return M_DELETE;
0960     }
0961     /* new file gets truncated */
0962     if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
0963         round_len = ROUND_UP(new_file_length);
0964         /* this was new_file_length < le_ih ... */
0965         if (round_len < le_ih_k_offset(le_ih)) {
0966             *cut_size = -(IH_SIZE + ih_item_len(le_ih));
0967             return M_DELETE;    /* Delete this item. */
0968         }
0969         /* Calculate first position and size for cutting from item. */
0970         pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
0971         *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
0972 
0973         return M_CUT;   /* Cut from this item. */
0974     }
0975 
0976     /* old file: items may have any length */
0977 
0978     if (new_file_length < le_ih_k_offset(le_ih)) {
0979         *cut_size = -(IH_SIZE + ih_item_len(le_ih));
0980         return M_DELETE;    /* Delete this item. */
0981     }
0982 
0983     /* Calculate first position and size for cutting from item. */
0984     *cut_size = -(ih_item_len(le_ih) -
0985               (pos_in_item(path) =
0986                new_file_length + 1 - le_ih_k_offset(le_ih)));
0987     return M_CUT;       /* Cut from this item. */
0988 }
0989 
0990 static inline int prepare_for_direntry_item(struct treepath *path,
0991                         struct item_head *le_ih,
0992                         struct inode *inode,
0993                         loff_t new_file_length,
0994                         int *cut_size)
0995 {
0996     if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
0997         new_file_length == max_reiserfs_offset(inode)) {
0998         RFALSE(ih_entry_count(le_ih) != 2,
0999                "PAP-5220: incorrect empty directory item (%h)", le_ih);
1000         *cut_size = -(IH_SIZE + ih_item_len(le_ih));
1001         /* Delete the directory item containing "." and ".." entry. */
1002         return M_DELETE;
1003     }
1004 
1005     if (ih_entry_count(le_ih) == 1) {
1006         /*
1007          * Delete the directory item such as there is one record only
1008          * in this item
1009          */
1010         *cut_size = -(IH_SIZE + ih_item_len(le_ih));
1011         return M_DELETE;
1012     }
1013 
1014     /* Cut one record from the directory item. */
1015     *cut_size =
1016         -(DEH_SIZE +
1017           entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
1018     return M_CUT;
1019 }
1020 
1021 #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
1022 
1023 /*
1024  * If the path points to a directory or direct item, calculate mode
1025  * and the size cut, for balance.
1026  * If the path points to an indirect item, remove some number of its
1027  * unformatted nodes.
1028  * In case of file truncate calculate whether this item must be
1029  * deleted/truncated or last unformatted node of this item will be
1030  * converted to a direct item.
1031  * This function returns a determination of what balance mode the
1032  * calling function should employ.
1033  */
1034 static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
1035                       struct inode *inode,
1036                       struct treepath *path,
1037                       const struct cpu_key *item_key,
1038                       /*
1039                        * Number of unformatted nodes
1040                        * which were removed from end
1041                        * of the file.
1042                        */
1043                       int *removed,
1044                       int *cut_size,
1045                       /* MAX_KEY_OFFSET in case of delete. */
1046                       unsigned long long new_file_length
1047     )
1048 {
1049     struct super_block *sb = inode->i_sb;
1050     struct item_head *p_le_ih = tp_item_head(path);
1051     struct buffer_head *bh = PATH_PLAST_BUFFER(path);
1052 
1053     BUG_ON(!th->t_trans_id);
1054 
1055     /* Stat_data item. */
1056     if (is_statdata_le_ih(p_le_ih)) {
1057 
1058         RFALSE(new_file_length != max_reiserfs_offset(inode),
1059                "PAP-5210: mode must be M_DELETE");
1060 
1061         *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1062         return M_DELETE;
1063     }
1064 
1065     /* Directory item. */
1066     if (is_direntry_le_ih(p_le_ih))
1067         return prepare_for_direntry_item(path, p_le_ih, inode,
1068                          new_file_length,
1069                          cut_size);
1070 
1071     /* Direct item. */
1072     if (is_direct_le_ih(p_le_ih))
1073         return prepare_for_direct_item(path, p_le_ih, inode,
1074                            new_file_length, cut_size);
1075 
1076     /* Case of an indirect item. */
1077     {
1078         int blk_size = sb->s_blocksize;
1079         struct item_head s_ih;
1080         int need_re_search;
1081         int delete = 0;
1082         int result = M_CUT;
1083         int pos = 0;
1084 
1085         if ( new_file_length == max_reiserfs_offset (inode) ) {
1086         /*
1087          * prepare_for_delete_or_cut() is called by
1088          * reiserfs_delete_item()
1089          */
1090         new_file_length = 0;
1091         delete = 1;
1092         }
1093 
1094         do {
1095         need_re_search = 0;
1096         *cut_size = 0;
1097         bh = PATH_PLAST_BUFFER(path);
1098         copy_item_head(&s_ih, tp_item_head(path));
1099         pos = I_UNFM_NUM(&s_ih);
1100 
1101         while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1102             __le32 *unfm;
1103             __u32 block;
1104 
1105             /*
1106              * Each unformatted block deletion may involve
1107              * one additional bitmap block into the transaction,
1108              * thereby the initial journal space reservation
1109              * might not be enough.
1110              */
1111             if (!delete && (*cut_size) != 0 &&
1112             reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1113             break;
1114 
1115             unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
1116             block = get_block_num(unfm, 0);
1117 
1118             if (block != 0) {
1119             reiserfs_prepare_for_journal(sb, bh, 1);
1120             put_block_num(unfm, 0, 0);
1121             journal_mark_dirty(th, bh);
1122             reiserfs_free_block(th, inode, block, 1);
1123             }
1124 
1125             reiserfs_cond_resched(sb);
1126 
1127             if (item_moved (&s_ih, path))  {
1128             need_re_search = 1;
1129             break;
1130             }
1131 
1132             pos --;
1133             (*removed)++;
1134             (*cut_size) -= UNFM_P_SIZE;
1135 
1136             if (pos == 0) {
1137             (*cut_size) -= IH_SIZE;
1138             result = M_DELETE;
1139             break;
1140             }
1141         }
1142         /*
1143          * a trick.  If the buffer has been logged, this will
1144          * do nothing.  If we've broken the loop without logging
1145          * it, it will restore the buffer
1146          */
1147         reiserfs_restore_prepared_buffer(sb, bh);
1148         } while (need_re_search &&
1149              search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1150         pos_in_item(path) = pos * UNFM_P_SIZE;
1151 
1152         if (*cut_size == 0) {
1153         /*
1154          * Nothing was cut. maybe convert last unformatted node to the
1155          * direct item?
1156          */
1157         result = M_CONVERT;
1158         }
1159         return result;
1160     }
1161 }
1162 
1163 /* Calculate number of bytes which will be deleted or cut during balance */
1164 static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1165 {
1166     int del_size;
1167     struct item_head *p_le_ih = tp_item_head(tb->tb_path);
1168 
1169     if (is_statdata_le_ih(p_le_ih))
1170         return 0;
1171 
1172     del_size =
1173         (mode ==
1174          M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1175     if (is_direntry_le_ih(p_le_ih)) {
1176         /*
1177          * return EMPTY_DIR_SIZE; We delete emty directories only.
1178          * we can't use EMPTY_DIR_SIZE, as old format dirs have a
1179          * different empty size.  ick. FIXME, is this right?
1180          */
1181         return del_size;
1182     }
1183 
1184     if (is_indirect_le_ih(p_le_ih))
1185         del_size = (del_size / UNFM_P_SIZE) *
1186                 (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1187     return del_size;
1188 }
1189 
1190 static void init_tb_struct(struct reiserfs_transaction_handle *th,
1191                struct tree_balance *tb,
1192                struct super_block *sb,
1193                struct treepath *path, int size)
1194 {
1195 
1196     BUG_ON(!th->t_trans_id);
1197 
1198     memset(tb, '\0', sizeof(struct tree_balance));
1199     tb->transaction_handle = th;
1200     tb->tb_sb = sb;
1201     tb->tb_path = path;
1202     PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1203     PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1204     tb->insert_size[0] = size;
1205 }
1206 
1207 void padd_item(char *item, int total_length, int length)
1208 {
1209     int i;
1210 
1211     for (i = total_length; i > length;)
1212         item[--i] = 0;
1213 }
1214 
1215 #ifdef REISERQUOTA_DEBUG
1216 char key2type(struct reiserfs_key *ih)
1217 {
1218     if (is_direntry_le_key(2, ih))
1219         return 'd';
1220     if (is_direct_le_key(2, ih))
1221         return 'D';
1222     if (is_indirect_le_key(2, ih))
1223         return 'i';
1224     if (is_statdata_le_key(2, ih))
1225         return 's';
1226     return 'u';
1227 }
1228 
1229 char head2type(struct item_head *ih)
1230 {
1231     if (is_direntry_le_ih(ih))
1232         return 'd';
1233     if (is_direct_le_ih(ih))
1234         return 'D';
1235     if (is_indirect_le_ih(ih))
1236         return 'i';
1237     if (is_statdata_le_ih(ih))
1238         return 's';
1239     return 'u';
1240 }
1241 #endif
1242 
1243 /*
1244  * Delete object item.
1245  * th       - active transaction handle
1246  * path     - path to the deleted item
1247  * item_key - key to search for the deleted item
1248  * indode   - used for updating i_blocks and quotas
1249  * un_bh    - NULL or unformatted node pointer
1250  */
1251 int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1252              struct treepath *path, const struct cpu_key *item_key,
1253              struct inode *inode, struct buffer_head *un_bh)
1254 {
1255     struct super_block *sb = inode->i_sb;
1256     struct tree_balance s_del_balance;
1257     struct item_head s_ih;
1258     struct item_head *q_ih;
1259     int quota_cut_bytes;
1260     int ret_value, del_size, removed;
1261     int depth;
1262 
1263 #ifdef CONFIG_REISERFS_CHECK
1264     char mode;
1265     int iter = 0;
1266 #endif
1267 
1268     BUG_ON(!th->t_trans_id);
1269 
1270     init_tb_struct(th, &s_del_balance, sb, path,
1271                0 /*size is unknown */ );
1272 
1273     while (1) {
1274         removed = 0;
1275 
1276 #ifdef CONFIG_REISERFS_CHECK
1277         iter++;
1278         mode =
1279 #endif
1280             prepare_for_delete_or_cut(th, inode, path,
1281                           item_key, &removed,
1282                           &del_size,
1283                           max_reiserfs_offset(inode));
1284 
1285         RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1286 
1287         copy_item_head(&s_ih, tp_item_head(path));
1288         s_del_balance.insert_size[0] = del_size;
1289 
1290         ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1291         if (ret_value != REPEAT_SEARCH)
1292             break;
1293 
1294         PROC_INFO_INC(sb, delete_item_restarted);
1295 
1296         /* file system changed, repeat search */
1297         ret_value =
1298             search_for_position_by_key(sb, item_key, path);
1299         if (ret_value == IO_ERROR)
1300             break;
1301         if (ret_value == FILE_NOT_FOUND) {
1302             reiserfs_warning(sb, "vs-5340",
1303                      "no items of the file %K found",
1304                      item_key);
1305             break;
1306         }
1307     }           /* while (1) */
1308 
1309     if (ret_value != CARRY_ON) {
1310         unfix_nodes(&s_del_balance);
1311         return 0;
1312     }
1313 
1314     /* reiserfs_delete_item returns item length when success */
1315     ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1316     q_ih = tp_item_head(path);
1317     quota_cut_bytes = ih_item_len(q_ih);
1318 
1319     /*
1320      * hack so the quota code doesn't have to guess if the file has a
1321      * tail.  On tail insert, we allocate quota for 1 unformatted node.
1322      * We test the offset because the tail might have been
1323      * split into multiple items, and we only want to decrement for
1324      * the unfm node once
1325      */
1326     if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1327         if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1328             quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1329         } else {
1330             quota_cut_bytes = 0;
1331         }
1332     }
1333 
1334     if (un_bh) {
1335         int off;
1336         char *data;
1337 
1338         /*
1339          * We are in direct2indirect conversion, so move tail contents
1340          * to the unformatted node
1341          */
1342         /*
1343          * note, we do the copy before preparing the buffer because we
1344          * don't care about the contents of the unformatted node yet.
1345          * the only thing we really care about is the direct item's
1346          * data is in the unformatted node.
1347          *
1348          * Otherwise, we would have to call
1349          * reiserfs_prepare_for_journal on the unformatted node,
1350          * which might schedule, meaning we'd have to loop all the
1351          * way back up to the start of the while loop.
1352          *
1353          * The unformatted node must be dirtied later on.  We can't be
1354          * sure here if the entire tail has been deleted yet.
1355          *
1356          * un_bh is from the page cache (all unformatted nodes are
1357          * from the page cache) and might be a highmem page.  So, we
1358          * can't use un_bh->b_data.
1359          * -clm
1360          */
1361 
1362         data = kmap_atomic(un_bh->b_page);
1363         off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
1364         memcpy(data + off,
1365                ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
1366                ret_value);
1367         kunmap_atomic(data);
1368     }
1369 
1370     /* Perform balancing after all resources have been collected at once. */
1371     do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1372 
1373 #ifdef REISERQUOTA_DEBUG
1374     reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1375                "reiserquota delete_item(): freeing %u, id=%u type=%c",
1376                quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1377 #endif
1378     depth = reiserfs_write_unlock_nested(inode->i_sb);
1379     dquot_free_space_nodirty(inode, quota_cut_bytes);
1380     reiserfs_write_lock_nested(inode->i_sb, depth);
1381 
1382     /* Return deleted body length */
1383     return ret_value;
1384 }
1385 
1386 /*
1387  * Summary Of Mechanisms For Handling Collisions Between Processes:
1388  *
1389  *  deletion of the body of the object is performed by iput(), with the
1390  *  result that if multiple processes are operating on a file, the
1391  *  deletion of the body of the file is deferred until the last process
1392  *  that has an open inode performs its iput().
1393  *
1394  *  writes and truncates are protected from collisions by use of
1395  *  semaphores.
1396  *
1397  *  creates, linking, and mknod are protected from collisions with other
1398  *  processes by making the reiserfs_add_entry() the last step in the
1399  *  creation, and then rolling back all changes if there was a collision.
1400  *  - Hans
1401 */
1402 
1403 /* this deletes item which never gets split */
1404 void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1405                 struct inode *inode, struct reiserfs_key *key)
1406 {
1407     struct super_block *sb = th->t_super;
1408     struct tree_balance tb;
1409     INITIALIZE_PATH(path);
1410     int item_len = 0;
1411     int tb_init = 0;
1412     struct cpu_key cpu_key;
1413     int retval;
1414     int quota_cut_bytes = 0;
1415 
1416     BUG_ON(!th->t_trans_id);
1417 
1418     le_key2cpu_key(&cpu_key, key);
1419 
1420     while (1) {
1421         retval = search_item(th->t_super, &cpu_key, &path);
1422         if (retval == IO_ERROR) {
1423             reiserfs_error(th->t_super, "vs-5350",
1424                        "i/o failure occurred trying "
1425                        "to delete %K", &cpu_key);
1426             break;
1427         }
1428         if (retval != ITEM_FOUND) {
1429             pathrelse(&path);
1430             /*
1431              * No need for a warning, if there is just no free
1432              * space to insert '..' item into the
1433              * newly-created subdir
1434              */
1435             if (!
1436                 ((unsigned long long)
1437                  GET_HASH_VALUE(le_key_k_offset
1438                         (le_key_version(key), key)) == 0
1439                  && (unsigned long long)
1440                  GET_GENERATION_NUMBER(le_key_k_offset
1441                            (le_key_version(key),
1442                             key)) == 1))
1443                 reiserfs_warning(th->t_super, "vs-5355",
1444                          "%k not found", key);
1445             break;
1446         }
1447         if (!tb_init) {
1448             tb_init = 1;
1449             item_len = ih_item_len(tp_item_head(&path));
1450             init_tb_struct(th, &tb, th->t_super, &path,
1451                        -(IH_SIZE + item_len));
1452         }
1453         quota_cut_bytes = ih_item_len(tp_item_head(&path));
1454 
1455         retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1456         if (retval == REPEAT_SEARCH) {
1457             PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1458             continue;
1459         }
1460 
1461         if (retval == CARRY_ON) {
1462             do_balance(&tb, NULL, NULL, M_DELETE);
1463             /*
1464              * Should we count quota for item? (we don't
1465              * count quotas for save-links)
1466              */
1467             if (inode) {
1468                 int depth;
1469 #ifdef REISERQUOTA_DEBUG
1470                 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1471                            "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1472                            quota_cut_bytes, inode->i_uid,
1473                            key2type(key));
1474 #endif
1475                 depth = reiserfs_write_unlock_nested(sb);
1476                 dquot_free_space_nodirty(inode,
1477                              quota_cut_bytes);
1478                 reiserfs_write_lock_nested(sb, depth);
1479             }
1480             break;
1481         }
1482 
1483         /* IO_ERROR, NO_DISK_SPACE, etc */
1484         reiserfs_warning(th->t_super, "vs-5360",
1485                  "could not delete %K due to fix_nodes failure",
1486                  &cpu_key);
1487         unfix_nodes(&tb);
1488         break;
1489     }
1490 
1491     reiserfs_check_path(&path);
1492 }
1493 
1494 int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1495                struct inode *inode)
1496 {
1497     int err;
1498     inode->i_size = 0;
1499     BUG_ON(!th->t_trans_id);
1500 
1501     /* for directory this deletes item containing "." and ".." */
1502     err =
1503         reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1504     if (err)
1505         return err;
1506 
1507 #if defined( USE_INODE_GENERATION_COUNTER )
1508     if (!old_format_only(th->t_super)) {
1509         __le32 *inode_generation;
1510 
1511         inode_generation =
1512             &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1513         le32_add_cpu(inode_generation, 1);
1514     }
1515 /* USE_INODE_GENERATION_COUNTER */
1516 #endif
1517     reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1518 
1519     return err;
1520 }
1521 
1522 static void unmap_buffers(struct page *page, loff_t pos)
1523 {
1524     struct buffer_head *bh;
1525     struct buffer_head *head;
1526     struct buffer_head *next;
1527     unsigned long tail_index;
1528     unsigned long cur_index;
1529 
1530     if (page) {
1531         if (page_has_buffers(page)) {
1532             tail_index = pos & (PAGE_SIZE - 1);
1533             cur_index = 0;
1534             head = page_buffers(page);
1535             bh = head;
1536             do {
1537                 next = bh->b_this_page;
1538 
1539                 /*
1540                  * we want to unmap the buffers that contain
1541                  * the tail, and all the buffers after it
1542                  * (since the tail must be at the end of the
1543                  * file).  We don't want to unmap file data
1544                  * before the tail, since it might be dirty
1545                  * and waiting to reach disk
1546                  */
1547                 cur_index += bh->b_size;
1548                 if (cur_index > tail_index) {
1549                     reiserfs_unmap_buffer(bh);
1550                 }
1551                 bh = next;
1552             } while (bh != head);
1553         }
1554     }
1555 }
1556 
1557 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1558                     struct inode *inode,
1559                     struct page *page,
1560                     struct treepath *path,
1561                     const struct cpu_key *item_key,
1562                     loff_t new_file_size, char *mode)
1563 {
1564     struct super_block *sb = inode->i_sb;
1565     int block_size = sb->s_blocksize;
1566     int cut_bytes;
1567     BUG_ON(!th->t_trans_id);
1568     BUG_ON(new_file_size != inode->i_size);
1569 
1570     /*
1571      * the page being sent in could be NULL if there was an i/o error
1572      * reading in the last block.  The user will hit problems trying to
1573      * read the file, but for now we just skip the indirect2direct
1574      */
1575     if (atomic_read(&inode->i_count) > 1 ||
1576         !tail_has_to_be_packed(inode) ||
1577         !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1578         /* leave tail in an unformatted node */
1579         *mode = M_SKIP_BALANCING;
1580         cut_bytes =
1581             block_size - (new_file_size & (block_size - 1));
1582         pathrelse(path);
1583         return cut_bytes;
1584     }
1585 
1586     /* Perform the conversion to a direct_item. */
1587     return indirect2direct(th, inode, page, path, item_key,
1588                    new_file_size, mode);
1589 }
1590 
1591 /*
1592  * we did indirect_to_direct conversion. And we have inserted direct
1593  * item successesfully, but there were no disk space to cut unfm
1594  * pointer being converted. Therefore we have to delete inserted
1595  * direct item(s)
1596  */
1597 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1598                      struct inode *inode, struct treepath *path)
1599 {
1600     struct cpu_key tail_key;
1601     int tail_len;
1602     int removed;
1603     BUG_ON(!th->t_trans_id);
1604 
1605     make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
1606     tail_key.key_length = 4;
1607 
1608     tail_len =
1609         (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1610     while (tail_len) {
1611         /* look for the last byte of the tail */
1612         if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1613             POSITION_NOT_FOUND)
1614             reiserfs_panic(inode->i_sb, "vs-5615",
1615                        "found invalid item");
1616         RFALSE(path->pos_in_item !=
1617                ih_item_len(tp_item_head(path)) - 1,
1618                "vs-5616: appended bytes found");
1619         PATH_LAST_POSITION(path)--;
1620 
1621         removed =
1622             reiserfs_delete_item(th, path, &tail_key, inode,
1623                      NULL /*unbh not needed */ );
1624         RFALSE(removed <= 0
1625                || removed > tail_len,
1626                "vs-5617: there was tail %d bytes, removed item length %d bytes",
1627                tail_len, removed);
1628         tail_len -= removed;
1629         set_cpu_key_k_offset(&tail_key,
1630                      cpu_key_k_offset(&tail_key) - removed);
1631     }
1632     reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1633              "conversion has been rolled back due to "
1634              "lack of disk space");
1635     mark_inode_dirty(inode);
1636 }
1637 
1638 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1639 int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1640                struct treepath *path,
1641                struct cpu_key *item_key,
1642                struct inode *inode,
1643                struct page *page, loff_t new_file_size)
1644 {
1645     struct super_block *sb = inode->i_sb;
1646     /*
1647      * Every function which is going to call do_balance must first
1648      * create a tree_balance structure.  Then it must fill up this
1649      * structure by using the init_tb_struct and fix_nodes functions.
1650      * After that we can make tree balancing.
1651      */
1652     struct tree_balance s_cut_balance;
1653     struct item_head *p_le_ih;
1654     int cut_size = 0;   /* Amount to be cut. */
1655     int ret_value = CARRY_ON;
1656     int removed = 0;    /* Number of the removed unformatted nodes. */
1657     int is_inode_locked = 0;
1658     char mode;      /* Mode of the balance. */
1659     int retval2 = -1;
1660     int quota_cut_bytes;
1661     loff_t tail_pos = 0;
1662     int depth;
1663 
1664     BUG_ON(!th->t_trans_id);
1665 
1666     init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1667                cut_size);
1668 
1669     /*
1670      * Repeat this loop until we either cut the item without needing
1671      * to balance, or we fix_nodes without schedule occurring
1672      */
1673     while (1) {
1674         /*
1675          * Determine the balance mode, position of the first byte to
1676          * be cut, and size to be cut.  In case of the indirect item
1677          * free unformatted nodes which are pointed to by the cut
1678          * pointers.
1679          */
1680 
1681         mode =
1682             prepare_for_delete_or_cut(th, inode, path,
1683                           item_key, &removed,
1684                           &cut_size, new_file_size);
1685         if (mode == M_CONVERT) {
1686             /*
1687              * convert last unformatted node to direct item or
1688              * leave tail in the unformatted node
1689              */
1690             RFALSE(ret_value != CARRY_ON,
1691                    "PAP-5570: can not convert twice");
1692 
1693             ret_value =
1694                 maybe_indirect_to_direct(th, inode, page,
1695                              path, item_key,
1696                              new_file_size, &mode);
1697             if (mode == M_SKIP_BALANCING)
1698                 /* tail has been left in the unformatted node */
1699                 return ret_value;
1700 
1701             is_inode_locked = 1;
1702 
1703             /*
1704              * removing of last unformatted node will
1705              * change value we have to return to truncate.
1706              * Save it
1707              */
1708             retval2 = ret_value;
1709 
1710             /*
1711              * So, we have performed the first part of the
1712              * conversion:
1713              * inserting the new direct item.  Now we are
1714              * removing the last unformatted node pointer.
1715              * Set key to search for it.
1716              */
1717             set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1718             item_key->key_length = 4;
1719             new_file_size -=
1720                 (new_file_size & (sb->s_blocksize - 1));
1721             tail_pos = new_file_size;
1722             set_cpu_key_k_offset(item_key, new_file_size + 1);
1723             if (search_for_position_by_key
1724                 (sb, item_key,
1725                  path) == POSITION_NOT_FOUND) {
1726                 print_block(PATH_PLAST_BUFFER(path), 3,
1727                         PATH_LAST_POSITION(path) - 1,
1728                         PATH_LAST_POSITION(path) + 1);
1729                 reiserfs_panic(sb, "PAP-5580", "item to "
1730                            "convert does not exist (%K)",
1731                            item_key);
1732             }
1733             continue;
1734         }
1735         if (cut_size == 0) {
1736             pathrelse(path);
1737             return 0;
1738         }
1739 
1740         s_cut_balance.insert_size[0] = cut_size;
1741 
1742         ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1743         if (ret_value != REPEAT_SEARCH)
1744             break;
1745 
1746         PROC_INFO_INC(sb, cut_from_item_restarted);
1747 
1748         ret_value =
1749             search_for_position_by_key(sb, item_key, path);
1750         if (ret_value == POSITION_FOUND)
1751             continue;
1752 
1753         reiserfs_warning(sb, "PAP-5610", "item %K not found",
1754                  item_key);
1755         unfix_nodes(&s_cut_balance);
1756         return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1757     }           /* while */
1758 
1759     /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
1760     if (ret_value != CARRY_ON) {
1761         if (is_inode_locked) {
1762             /*
1763              * FIXME: this seems to be not needed: we are always
1764              * able to cut item
1765              */
1766             indirect_to_direct_roll_back(th, inode, path);
1767         }
1768         if (ret_value == NO_DISK_SPACE)
1769             reiserfs_warning(sb, "reiserfs-5092",
1770                      "NO_DISK_SPACE");
1771         unfix_nodes(&s_cut_balance);
1772         return -EIO;
1773     }
1774 
1775     /* go ahead and perform balancing */
1776 
1777     RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1778 
1779     /* Calculate number of bytes that need to be cut from the item. */
1780     quota_cut_bytes =
1781         (mode ==
1782          M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
1783         insert_size[0];
1784     if (retval2 == -1)
1785         ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1786     else
1787         ret_value = retval2;
1788 
1789     /*
1790      * For direct items, we only change the quota when deleting the last
1791      * item.
1792      */
1793     p_le_ih = tp_item_head(s_cut_balance.tb_path);
1794     if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1795         if (mode == M_DELETE &&
1796             (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1797             1) {
1798             /* FIXME: this is to keep 3.5 happy */
1799             REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1800             quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1801         } else {
1802             quota_cut_bytes = 0;
1803         }
1804     }
1805 #ifdef CONFIG_REISERFS_CHECK
1806     if (is_inode_locked) {
1807         struct item_head *le_ih =
1808             tp_item_head(s_cut_balance.tb_path);
1809         /*
1810          * we are going to complete indirect2direct conversion. Make
1811          * sure, that we exactly remove last unformatted node pointer
1812          * of the item
1813          */
1814         if (!is_indirect_le_ih(le_ih))
1815             reiserfs_panic(sb, "vs-5652",
1816                        "item must be indirect %h", le_ih);
1817 
1818         if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1819             reiserfs_panic(sb, "vs-5653", "completing "
1820                        "indirect2direct conversion indirect "
1821                        "item %h being deleted must be of "
1822                        "4 byte long", le_ih);
1823 
1824         if (mode == M_CUT
1825             && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1826             reiserfs_panic(sb, "vs-5654", "can not complete "
1827                        "indirect2direct conversion of %h "
1828                        "(CUT, insert_size==%d)",
1829                        le_ih, s_cut_balance.insert_size[0]);
1830         }
1831         /*
1832          * it would be useful to make sure, that right neighboring
1833          * item is direct item of this file
1834          */
1835     }
1836 #endif
1837 
1838     do_balance(&s_cut_balance, NULL, NULL, mode);
1839     if (is_inode_locked) {
1840         /*
1841          * we've done an indirect->direct conversion.  when the
1842          * data block was freed, it was removed from the list of
1843          * blocks that must be flushed before the transaction
1844          * commits, make sure to unmap and invalidate it
1845          */
1846         unmap_buffers(page, tail_pos);
1847         REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1848     }
1849 #ifdef REISERQUOTA_DEBUG
1850     reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1851                "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1852                quota_cut_bytes, inode->i_uid, '?');
1853 #endif
1854     depth = reiserfs_write_unlock_nested(sb);
1855     dquot_free_space_nodirty(inode, quota_cut_bytes);
1856     reiserfs_write_lock_nested(sb, depth);
1857     return ret_value;
1858 }
1859 
1860 static void truncate_directory(struct reiserfs_transaction_handle *th,
1861                    struct inode *inode)
1862 {
1863     BUG_ON(!th->t_trans_id);
1864     if (inode->i_nlink)
1865         reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1866 
1867     set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1868     set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1869     reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1870     reiserfs_update_sd(th, inode);
1871     set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1872     set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1873 }
1874 
1875 /*
1876  * Truncate file to the new size. Note, this must be called with a
1877  * transaction already started
1878  */
1879 int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1880              struct inode *inode,   /* ->i_size contains new size */
1881              struct page *page, /* up to date for last block */
1882              /*
1883               * when it is called by file_release to convert
1884               * the tail - no timestamps should be updated
1885               */
1886              int update_timestamps
1887     )
1888 {
1889     INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1890     struct item_head *p_le_ih;  /* Pointer to an item header. */
1891 
1892     /* Key to search for a previous file item. */
1893     struct cpu_key s_item_key;
1894     loff_t file_size,   /* Old file size. */
1895      new_file_size; /* New file size. */
1896     int deleted;        /* Number of deleted or truncated bytes. */
1897     int retval;
1898     int err = 0;
1899 
1900     BUG_ON(!th->t_trans_id);
1901     if (!
1902         (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1903          || S_ISLNK(inode->i_mode)))
1904         return 0;
1905 
1906     /* deletion of directory - no need to update timestamps */
1907     if (S_ISDIR(inode->i_mode)) {
1908         truncate_directory(th, inode);
1909         return 0;
1910     }
1911 
1912     /* Get new file size. */
1913     new_file_size = inode->i_size;
1914 
1915     /* FIXME: note, that key type is unimportant here */
1916     make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1917              TYPE_DIRECT, 3);
1918 
1919     retval =
1920         search_for_position_by_key(inode->i_sb, &s_item_key,
1921                        &s_search_path);
1922     if (retval == IO_ERROR) {
1923         reiserfs_error(inode->i_sb, "vs-5657",
1924                    "i/o failure occurred trying to truncate %K",
1925                    &s_item_key);
1926         err = -EIO;
1927         goto out;
1928     }
1929     if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1930         reiserfs_error(inode->i_sb, "PAP-5660",
1931                    "wrong result %d of search for %K", retval,
1932                    &s_item_key);
1933 
1934         err = -EIO;
1935         goto out;
1936     }
1937 
1938     s_search_path.pos_in_item--;
1939 
1940     /* Get real file size (total length of all file items) */
1941     p_le_ih = tp_item_head(&s_search_path);
1942     if (is_statdata_le_ih(p_le_ih))
1943         file_size = 0;
1944     else {
1945         loff_t offset = le_ih_k_offset(p_le_ih);
1946         int bytes =
1947             op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1948 
1949         /*
1950          * this may mismatch with real file size: if last direct item
1951          * had no padding zeros and last unformatted node had no free
1952          * space, this file would have this file size
1953          */
1954         file_size = offset + bytes - 1;
1955     }
1956     /*
1957      * are we doing a full truncate or delete, if so
1958      * kick in the reada code
1959      */
1960     if (new_file_size == 0)
1961         s_search_path.reada = PATH_READA | PATH_READA_BACK;
1962 
1963     if (file_size == 0 || file_size < new_file_size) {
1964         goto update_and_out;
1965     }
1966 
1967     /* Update key to search for the last file item. */
1968     set_cpu_key_k_offset(&s_item_key, file_size);
1969 
1970     do {
1971         /* Cut or delete file item. */
1972         deleted =
1973             reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1974                        inode, page, new_file_size);
1975         if (deleted < 0) {
1976             reiserfs_warning(inode->i_sb, "vs-5665",
1977                      "reiserfs_cut_from_item failed");
1978             reiserfs_check_path(&s_search_path);
1979             return 0;
1980         }
1981 
1982         RFALSE(deleted > file_size,
1983                "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1984                deleted, file_size, &s_item_key);
1985 
1986         /* Change key to search the last file item. */
1987         file_size -= deleted;
1988 
1989         set_cpu_key_k_offset(&s_item_key, file_size);
1990 
1991         /*
1992          * While there are bytes to truncate and previous
1993          * file item is presented in the tree.
1994          */
1995 
1996         /*
1997          * This loop could take a really long time, and could log
1998          * many more blocks than a transaction can hold.  So, we do
1999          * a polite journal end here, and if the transaction needs
2000          * ending, we make sure the file is consistent before ending
2001          * the current trans and starting a new one
2002          */
2003         if (journal_transaction_should_end(th, 0) ||
2004             reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
2005             pathrelse(&s_search_path);
2006 
2007             if (update_timestamps) {
2008                 inode->i_mtime = current_time(inode);
2009                 inode->i_ctime = current_time(inode);
2010             }
2011             reiserfs_update_sd(th, inode);
2012 
2013             err = journal_end(th);
2014             if (err)
2015                 goto out;
2016             err = journal_begin(th, inode->i_sb,
2017                         JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
2018             if (err)
2019                 goto out;
2020             reiserfs_update_inode_transaction(inode);
2021         }
2022     } while (file_size > ROUND_UP(new_file_size) &&
2023          search_for_position_by_key(inode->i_sb, &s_item_key,
2024                         &s_search_path) == POSITION_FOUND);
2025 
2026     RFALSE(file_size > ROUND_UP(new_file_size),
2027            "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
2028            new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
2029 
2030 update_and_out:
2031     if (update_timestamps) {
2032         /* this is truncate, not file closing */
2033         inode->i_mtime = current_time(inode);
2034         inode->i_ctime = current_time(inode);
2035     }
2036     reiserfs_update_sd(th, inode);
2037 
2038 out:
2039     pathrelse(&s_search_path);
2040     return err;
2041 }
2042 
2043 #ifdef CONFIG_REISERFS_CHECK
2044 /* this makes sure, that we __append__, not overwrite or add holes */
2045 static void check_research_for_paste(struct treepath *path,
2046                      const struct cpu_key *key)
2047 {
2048     struct item_head *found_ih = tp_item_head(path);
2049 
2050     if (is_direct_le_ih(found_ih)) {
2051         if (le_ih_k_offset(found_ih) +
2052             op_bytes_number(found_ih,
2053                     get_last_bh(path)->b_size) !=
2054             cpu_key_k_offset(key)
2055             || op_bytes_number(found_ih,
2056                        get_last_bh(path)->b_size) !=
2057             pos_in_item(path))
2058             reiserfs_panic(NULL, "PAP-5720", "found direct item "
2059                        "%h or position (%d) does not match "
2060                        "to key %K", found_ih,
2061                        pos_in_item(path), key);
2062     }
2063     if (is_indirect_le_ih(found_ih)) {
2064         if (le_ih_k_offset(found_ih) +
2065             op_bytes_number(found_ih,
2066                     get_last_bh(path)->b_size) !=
2067             cpu_key_k_offset(key)
2068             || I_UNFM_NUM(found_ih) != pos_in_item(path)
2069             || get_ih_free_space(found_ih) != 0)
2070             reiserfs_panic(NULL, "PAP-5730", "found indirect "
2071                        "item (%h) or position (%d) does not "
2072                        "match to key (%K)",
2073                        found_ih, pos_in_item(path), key);
2074     }
2075 }
2076 #endif              /* config reiserfs check */
2077 
2078 /*
2079  * Paste bytes to the existing item.
2080  * Returns bytes number pasted into the item.
2081  */
2082 int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2083                  /* Path to the pasted item. */
2084                  struct treepath *search_path,
2085                  /* Key to search for the needed item. */
2086                  const struct cpu_key *key,
2087                  /* Inode item belongs to */
2088                  struct inode *inode,
2089                  /* Pointer to the bytes to paste. */
2090                  const char *body,
2091                  /* Size of pasted bytes. */
2092                  int pasted_size)
2093 {
2094     struct super_block *sb = inode->i_sb;
2095     struct tree_balance s_paste_balance;
2096     int retval;
2097     int fs_gen;
2098     int depth;
2099 
2100     BUG_ON(!th->t_trans_id);
2101 
2102     fs_gen = get_generation(inode->i_sb);
2103 
2104 #ifdef REISERQUOTA_DEBUG
2105     reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2106                "reiserquota paste_into_item(): allocating %u id=%u type=%c",
2107                pasted_size, inode->i_uid,
2108                key2type(&key->on_disk_key));
2109 #endif
2110 
2111     depth = reiserfs_write_unlock_nested(sb);
2112     retval = dquot_alloc_space_nodirty(inode, pasted_size);
2113     reiserfs_write_lock_nested(sb, depth);
2114     if (retval) {
2115         pathrelse(search_path);
2116         return retval;
2117     }
2118     init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
2119                pasted_size);
2120 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2121     s_paste_balance.key = key->on_disk_key;
2122 #endif
2123 
2124     /* DQUOT_* can schedule, must check before the fix_nodes */
2125     if (fs_changed(fs_gen, inode->i_sb)) {
2126         goto search_again;
2127     }
2128 
2129     while ((retval =
2130         fix_nodes(M_PASTE, &s_paste_balance, NULL,
2131               body)) == REPEAT_SEARCH) {
2132 search_again:
2133         /* file system changed while we were in the fix_nodes */
2134         PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2135         retval =
2136             search_for_position_by_key(th->t_super, key,
2137                            search_path);
2138         if (retval == IO_ERROR) {
2139             retval = -EIO;
2140             goto error_out;
2141         }
2142         if (retval == POSITION_FOUND) {
2143             reiserfs_warning(inode->i_sb, "PAP-5710",
2144                      "entry or pasted byte (%K) exists",
2145                      key);
2146             retval = -EEXIST;
2147             goto error_out;
2148         }
2149 #ifdef CONFIG_REISERFS_CHECK
2150         check_research_for_paste(search_path, key);
2151 #endif
2152     }
2153 
2154     /*
2155      * Perform balancing after all resources are collected by fix_nodes,
2156      * and accessing them will not risk triggering schedule.
2157      */
2158     if (retval == CARRY_ON) {
2159         do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2160         return 0;
2161     }
2162     retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2163 error_out:
2164     /* this also releases the path */
2165     unfix_nodes(&s_paste_balance);
2166 #ifdef REISERQUOTA_DEBUG
2167     reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2168                "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2169                pasted_size, inode->i_uid,
2170                key2type(&key->on_disk_key));
2171 #endif
2172     depth = reiserfs_write_unlock_nested(sb);
2173     dquot_free_space_nodirty(inode, pasted_size);
2174     reiserfs_write_lock_nested(sb, depth);
2175     return retval;
2176 }
2177 
2178 /*
2179  * Insert new item into the buffer at the path.
2180  * th   - active transaction handle
2181  * path - path to the inserted item
2182  * ih   - pointer to the item header to insert
2183  * body - pointer to the bytes to insert
2184  */
2185 int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2186              struct treepath *path, const struct cpu_key *key,
2187              struct item_head *ih, struct inode *inode,
2188              const char *body)
2189 {
2190     struct tree_balance s_ins_balance;
2191     int retval;
2192     int fs_gen = 0;
2193     int quota_bytes = 0;
2194 
2195     BUG_ON(!th->t_trans_id);
2196 
2197     if (inode) {        /* Do we count quotas for item? */
2198         int depth;
2199         fs_gen = get_generation(inode->i_sb);
2200         quota_bytes = ih_item_len(ih);
2201 
2202         /*
2203          * hack so the quota code doesn't have to guess
2204          * if the file has a tail, links are always tails,
2205          * so there's no guessing needed
2206          */
2207         if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2208             quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2209 #ifdef REISERQUOTA_DEBUG
2210         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2211                    "reiserquota insert_item(): allocating %u id=%u type=%c",
2212                    quota_bytes, inode->i_uid, head2type(ih));
2213 #endif
2214         /*
2215          * We can't dirty inode here. It would be immediately
2216          * written but appropriate stat item isn't inserted yet...
2217          */
2218         depth = reiserfs_write_unlock_nested(inode->i_sb);
2219         retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2220         reiserfs_write_lock_nested(inode->i_sb, depth);
2221         if (retval) {
2222             pathrelse(path);
2223             return retval;
2224         }
2225     }
2226     init_tb_struct(th, &s_ins_balance, th->t_super, path,
2227                IH_SIZE + ih_item_len(ih));
2228 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2229     s_ins_balance.key = key->on_disk_key;
2230 #endif
2231     /*
2232      * DQUOT_* can schedule, must check to be sure calling
2233      * fix_nodes is safe
2234      */
2235     if (inode && fs_changed(fs_gen, inode->i_sb)) {
2236         goto search_again;
2237     }
2238 
2239     while ((retval =
2240         fix_nodes(M_INSERT, &s_ins_balance, ih,
2241               body)) == REPEAT_SEARCH) {
2242 search_again:
2243         /* file system changed while we were in the fix_nodes */
2244         PROC_INFO_INC(th->t_super, insert_item_restarted);
2245         retval = search_item(th->t_super, key, path);
2246         if (retval == IO_ERROR) {
2247             retval = -EIO;
2248             goto error_out;
2249         }
2250         if (retval == ITEM_FOUND) {
2251             reiserfs_warning(th->t_super, "PAP-5760",
2252                      "key %K already exists in the tree",
2253                      key);
2254             retval = -EEXIST;
2255             goto error_out;
2256         }
2257     }
2258 
2259     /* make balancing after all resources will be collected at a time */
2260     if (retval == CARRY_ON) {
2261         do_balance(&s_ins_balance, ih, body, M_INSERT);
2262         return 0;
2263     }
2264 
2265     retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2266 error_out:
2267     /* also releases the path */
2268     unfix_nodes(&s_ins_balance);
2269 #ifdef REISERQUOTA_DEBUG
2270     if (inode)
2271         reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2272                "reiserquota insert_item(): freeing %u id=%u type=%c",
2273                quota_bytes, inode->i_uid, head2type(ih));
2274 #endif
2275     if (inode) {
2276         int depth = reiserfs_write_unlock_nested(inode->i_sb);
2277         dquot_free_space_nodirty(inode, quota_bytes);
2278         reiserfs_write_lock_nested(inode->i_sb, depth);
2279     }
2280     return retval;
2281 }