Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * lcnalloc.c - Cluster (de)allocation code.  Part of the Linux-NTFS project.
0004  *
0005  * Copyright (c) 2004-2005 Anton Altaparmakov
0006  */
0007 
0008 #ifdef NTFS_RW
0009 
0010 #include <linux/pagemap.h>
0011 
0012 #include "lcnalloc.h"
0013 #include "debug.h"
0014 #include "bitmap.h"
0015 #include "inode.h"
0016 #include "volume.h"
0017 #include "attrib.h"
0018 #include "malloc.h"
0019 #include "aops.h"
0020 #include "ntfs.h"
0021 
0022 /**
0023  * ntfs_cluster_free_from_rl_nolock - free clusters from runlist
0024  * @vol:    mounted ntfs volume on which to free the clusters
0025  * @rl:     runlist describing the clusters to free
0026  *
0027  * Free all the clusters described by the runlist @rl on the volume @vol.  In
0028  * the case of an error being returned, at least some of the clusters were not
0029  * freed.
0030  *
0031  * Return 0 on success and -errno on error.
0032  *
0033  * Locking: - The volume lcn bitmap must be locked for writing on entry and is
0034  *        left locked on return.
0035  */
0036 int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
0037         const runlist_element *rl)
0038 {
0039     struct inode *lcnbmp_vi = vol->lcnbmp_ino;
0040     int ret = 0;
0041 
0042     ntfs_debug("Entering.");
0043     if (!rl)
0044         return 0;
0045     for (; rl->length; rl++) {
0046         int err;
0047 
0048         if (rl->lcn < 0)
0049             continue;
0050         err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
0051         if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
0052             ret = err;
0053     }
0054     ntfs_debug("Done.");
0055     return ret;
0056 }
0057 
0058 /**
0059  * ntfs_cluster_alloc - allocate clusters on an ntfs volume
0060  * @vol:    mounted ntfs volume on which to allocate the clusters
0061  * @start_vcn:  vcn to use for the first allocated cluster
0062  * @count:  number of clusters to allocate
0063  * @start_lcn:  starting lcn at which to allocate the clusters (or -1 if none)
0064  * @zone:   zone from which to allocate the clusters
0065  * @is_extension:   if 'true', this is an attribute extension
0066  *
0067  * Allocate @count clusters preferably starting at cluster @start_lcn or at the
0068  * current allocator position if @start_lcn is -1, on the mounted ntfs volume
0069  * @vol. @zone is either DATA_ZONE for allocation of normal clusters or
0070  * MFT_ZONE for allocation of clusters for the master file table, i.e. the
0071  * $MFT/$DATA attribute.
0072  *
0073  * @start_vcn specifies the vcn of the first allocated cluster.  This makes
0074  * merging the resulting runlist with the old runlist easier.
0075  *
0076  * If @is_extension is 'true', the caller is allocating clusters to extend an
0077  * attribute and if it is 'false', the caller is allocating clusters to fill a
0078  * hole in an attribute.  Practically the difference is that if @is_extension
0079  * is 'true' the returned runlist will be terminated with LCN_ENOENT and if
0080  * @is_extension is 'false' the runlist will be terminated with
0081  * LCN_RL_NOT_MAPPED.
0082  *
0083  * You need to check the return value with IS_ERR().  If this is false, the
0084  * function was successful and the return value is a runlist describing the
0085  * allocated cluster(s).  If IS_ERR() is true, the function failed and
0086  * PTR_ERR() gives you the error code.
0087  *
0088  * Notes on the allocation algorithm
0089  * =================================
0090  *
0091  * There are two data zones.  First is the area between the end of the mft zone
0092  * and the end of the volume, and second is the area between the start of the
0093  * volume and the start of the mft zone.  On unmodified/standard NTFS 1.x
0094  * volumes, the second data zone does not exist due to the mft zone being
0095  * expanded to cover the start of the volume in order to reserve space for the
0096  * mft bitmap attribute.
0097  *
0098  * This is not the prettiest function but the complexity stems from the need of
0099  * implementing the mft vs data zoned approach and from the fact that we have
0100  * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we
0101  * need to cope with crossing over boundaries of two buffers.  Further, the
0102  * fact that the allocator allows for caller supplied hints as to the location
0103  * of where allocation should begin and the fact that the allocator keeps track
0104  * of where in the data zones the next natural allocation should occur,
0105  * contribute to the complexity of the function.  But it should all be
0106  * worthwhile, because this allocator should: 1) be a full implementation of
0107  * the MFT zone approach used by Windows NT, 2) cause reduction in
0108  * fragmentation, and 3) be speedy in allocations (the code is not optimized
0109  * for speed, but the algorithm is, so further speed improvements are probably
0110  * possible).
0111  *
0112  * FIXME: We should be monitoring cluster allocation and increment the MFT zone
0113  * size dynamically but this is something for the future.  We will just cause
0114  * heavier fragmentation by not doing it and I am not even sure Windows would
0115  * grow the MFT zone dynamically, so it might even be correct not to do this.
0116  * The overhead in doing dynamic MFT zone expansion would be very large and
0117  * unlikely worth the effort. (AIA)
0118  *
0119  * TODO: I have added in double the required zone position pointer wrap around
0120  * logic which can be optimized to having only one of the two logic sets.
0121  * However, having the double logic will work fine, but if we have only one of
0122  * the sets and we get it wrong somewhere, then we get into trouble, so
0123  * removing the duplicate logic requires _very_ careful consideration of _all_
0124  * possible code paths.  So at least for now, I am leaving the double logic -
0125  * better safe than sorry... (AIA)
0126  *
0127  * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked
0128  *        on return.
0129  *      - This function takes the volume lcn bitmap lock for writing and
0130  *        modifies the bitmap contents.
0131  */
0132 runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
0133         const s64 count, const LCN start_lcn,
0134         const NTFS_CLUSTER_ALLOCATION_ZONES zone,
0135         const bool is_extension)
0136 {
0137     LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
0138     LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
0139     s64 clusters;
0140     loff_t i_size;
0141     struct inode *lcnbmp_vi;
0142     runlist_element *rl = NULL;
0143     struct address_space *mapping;
0144     struct page *page = NULL;
0145     u8 *buf, *byte;
0146     int err = 0, rlpos, rlsize, buf_size;
0147     u8 pass, done_zones, search_zone, need_writeback = 0, bit;
0148 
0149     ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn "
0150             "0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn,
0151             (unsigned long long)count,
0152             (unsigned long long)start_lcn,
0153             zone == MFT_ZONE ? "MFT" : "DATA");
0154     BUG_ON(!vol);
0155     lcnbmp_vi = vol->lcnbmp_ino;
0156     BUG_ON(!lcnbmp_vi);
0157     BUG_ON(start_vcn < 0);
0158     BUG_ON(count < 0);
0159     BUG_ON(start_lcn < -1);
0160     BUG_ON(zone < FIRST_ZONE);
0161     BUG_ON(zone > LAST_ZONE);
0162 
0163     /* Return NULL if @count is zero. */
0164     if (!count)
0165         return NULL;
0166     /* Take the lcnbmp lock for writing. */
0167     down_write(&vol->lcnbmp_lock);
0168     /*
0169      * If no specific @start_lcn was requested, use the current data zone
0170      * position, otherwise use the requested @start_lcn but make sure it
0171      * lies outside the mft zone.  Also set done_zones to 0 (no zones done)
0172      * and pass depending on whether we are starting inside a zone (1) or
0173      * at the beginning of a zone (2).  If requesting from the MFT_ZONE,
0174      * we either start at the current position within the mft zone or at
0175      * the specified position.  If the latter is out of bounds then we start
0176      * at the beginning of the MFT_ZONE.
0177      */
0178     done_zones = 0;
0179     pass = 1;
0180     /*
0181      * zone_start and zone_end are the current search range.  search_zone
0182      * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
0183      * volume) and 4 for data zone 2 (start of volume till start of mft
0184      * zone).
0185      */
0186     zone_start = start_lcn;
0187     if (zone_start < 0) {
0188         if (zone == DATA_ZONE)
0189             zone_start = vol->data1_zone_pos;
0190         else
0191             zone_start = vol->mft_zone_pos;
0192         if (!zone_start) {
0193             /*
0194              * Zone starts at beginning of volume which means a
0195              * single pass is sufficient.
0196              */
0197             pass = 2;
0198         }
0199     } else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start &&
0200             zone_start < vol->mft_zone_end) {
0201         zone_start = vol->mft_zone_end;
0202         /*
0203          * Starting at beginning of data1_zone which means a single
0204          * pass in this zone is sufficient.
0205          */
0206         pass = 2;
0207     } else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start ||
0208             zone_start >= vol->mft_zone_end)) {
0209         zone_start = vol->mft_lcn;
0210         if (!vol->mft_zone_end)
0211             zone_start = 0;
0212         /*
0213          * Starting at beginning of volume which means a single pass
0214          * is sufficient.
0215          */
0216         pass = 2;
0217     }
0218     if (zone == MFT_ZONE) {
0219         zone_end = vol->mft_zone_end;
0220         search_zone = 1;
0221     } else /* if (zone == DATA_ZONE) */ {
0222         /* Skip searching the mft zone. */
0223         done_zones |= 1;
0224         if (zone_start >= vol->mft_zone_end) {
0225             zone_end = vol->nr_clusters;
0226             search_zone = 2;
0227         } else {
0228             zone_end = vol->mft_zone_start;
0229             search_zone = 4;
0230         }
0231     }
0232     /*
0233      * bmp_pos is the current bit position inside the bitmap.  We use
0234      * bmp_initial_pos to determine whether or not to do a zone switch.
0235      */
0236     bmp_pos = bmp_initial_pos = zone_start;
0237 
0238     /* Loop until all clusters are allocated, i.e. clusters == 0. */
0239     clusters = count;
0240     rlpos = rlsize = 0;
0241     mapping = lcnbmp_vi->i_mapping;
0242     i_size = i_size_read(lcnbmp_vi);
0243     while (1) {
0244         ntfs_debug("Start of outer while loop: done_zones 0x%x, "
0245                 "search_zone %i, pass %i, zone_start 0x%llx, "
0246                 "zone_end 0x%llx, bmp_initial_pos 0x%llx, "
0247                 "bmp_pos 0x%llx, rlpos %i, rlsize %i.",
0248                 done_zones, search_zone, pass,
0249                 (unsigned long long)zone_start,
0250                 (unsigned long long)zone_end,
0251                 (unsigned long long)bmp_initial_pos,
0252                 (unsigned long long)bmp_pos, rlpos, rlsize);
0253         /* Loop until we run out of free clusters. */
0254         last_read_pos = bmp_pos >> 3;
0255         ntfs_debug("last_read_pos 0x%llx.",
0256                 (unsigned long long)last_read_pos);
0257         if (last_read_pos > i_size) {
0258             ntfs_debug("End of attribute reached.  "
0259                     "Skipping to zone_pass_done.");
0260             goto zone_pass_done;
0261         }
0262         if (likely(page)) {
0263             if (need_writeback) {
0264                 ntfs_debug("Marking page dirty.");
0265                 flush_dcache_page(page);
0266                 set_page_dirty(page);
0267                 need_writeback = 0;
0268             }
0269             ntfs_unmap_page(page);
0270         }
0271         page = ntfs_map_page(mapping, last_read_pos >>
0272                 PAGE_SHIFT);
0273         if (IS_ERR(page)) {
0274             err = PTR_ERR(page);
0275             ntfs_error(vol->sb, "Failed to map page.");
0276             goto out;
0277         }
0278         buf_size = last_read_pos & ~PAGE_MASK;
0279         buf = page_address(page) + buf_size;
0280         buf_size = PAGE_SIZE - buf_size;
0281         if (unlikely(last_read_pos + buf_size > i_size))
0282             buf_size = i_size - last_read_pos;
0283         buf_size <<= 3;
0284         lcn = bmp_pos & 7;
0285         bmp_pos &= ~(LCN)7;
0286         ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, "
0287                 "bmp_pos 0x%llx, need_writeback %i.", buf_size,
0288                 (unsigned long long)lcn,
0289                 (unsigned long long)bmp_pos, need_writeback);
0290         while (lcn < buf_size && lcn + bmp_pos < zone_end) {
0291             byte = buf + (lcn >> 3);
0292             ntfs_debug("In inner while loop: buf_size %i, "
0293                     "lcn 0x%llx, bmp_pos 0x%llx, "
0294                     "need_writeback %i, byte ofs 0x%x, "
0295                     "*byte 0x%x.", buf_size,
0296                     (unsigned long long)lcn,
0297                     (unsigned long long)bmp_pos,
0298                     need_writeback,
0299                     (unsigned int)(lcn >> 3),
0300                     (unsigned int)*byte);
0301             /* Skip full bytes. */
0302             if (*byte == 0xff) {
0303                 lcn = (lcn + 8) & ~(LCN)7;
0304                 ntfs_debug("Continuing while loop 1.");
0305                 continue;
0306             }
0307             bit = 1 << (lcn & 7);
0308             ntfs_debug("bit 0x%x.", bit);
0309             /* If the bit is already set, go onto the next one. */
0310             if (*byte & bit) {
0311                 lcn++;
0312                 ntfs_debug("Continuing while loop 2.");
0313                 continue;
0314             }
0315             /*
0316              * Allocate more memory if needed, including space for
0317              * the terminator element.
0318              * ntfs_malloc_nofs() operates on whole pages only.
0319              */
0320             if ((rlpos + 2) * sizeof(*rl) > rlsize) {
0321                 runlist_element *rl2;
0322 
0323                 ntfs_debug("Reallocating memory.");
0324                 if (!rl)
0325                     ntfs_debug("First free bit is at LCN "
0326                             "0x%llx.",
0327                             (unsigned long long)
0328                             (lcn + bmp_pos));
0329                 rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
0330                 if (unlikely(!rl2)) {
0331                     err = -ENOMEM;
0332                     ntfs_error(vol->sb, "Failed to "
0333                             "allocate memory.");
0334                     goto out;
0335                 }
0336                 memcpy(rl2, rl, rlsize);
0337                 ntfs_free(rl);
0338                 rl = rl2;
0339                 rlsize += PAGE_SIZE;
0340                 ntfs_debug("Reallocated memory, rlsize 0x%x.",
0341                         rlsize);
0342             }
0343             /* Allocate the bitmap bit. */
0344             *byte |= bit;
0345             /* We need to write this bitmap page to disk. */
0346             need_writeback = 1;
0347             ntfs_debug("*byte 0x%x, need_writeback is set.",
0348                     (unsigned int)*byte);
0349             /*
0350              * Coalesce with previous run if adjacent LCNs.
0351              * Otherwise, append a new run.
0352              */
0353             ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), "
0354                     "prev_lcn 0x%llx, lcn 0x%llx, "
0355                     "bmp_pos 0x%llx, prev_run_len 0x%llx, "
0356                     "rlpos %i.",
0357                     (unsigned long long)(lcn + bmp_pos),
0358                     1ULL, (unsigned long long)prev_lcn,
0359                     (unsigned long long)lcn,
0360                     (unsigned long long)bmp_pos,
0361                     (unsigned long long)prev_run_len,
0362                     rlpos);
0363             if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
0364                 ntfs_debug("Coalescing to run (lcn 0x%llx, "
0365                         "len 0x%llx).",
0366                         (unsigned long long)
0367                         rl[rlpos - 1].lcn,
0368                         (unsigned long long)
0369                         rl[rlpos - 1].length);
0370                 rl[rlpos - 1].length = ++prev_run_len;
0371                 ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), "
0372                         "prev_run_len 0x%llx.",
0373                         (unsigned long long)
0374                         rl[rlpos - 1].lcn,
0375                         (unsigned long long)
0376                         rl[rlpos - 1].length,
0377                         (unsigned long long)
0378                         prev_run_len);
0379             } else {
0380                 if (likely(rlpos)) {
0381                     ntfs_debug("Adding new run, (previous "
0382                             "run lcn 0x%llx, "
0383                             "len 0x%llx).",
0384                             (unsigned long long)
0385                             rl[rlpos - 1].lcn,
0386                             (unsigned long long)
0387                             rl[rlpos - 1].length);
0388                     rl[rlpos].vcn = rl[rlpos - 1].vcn +
0389                             prev_run_len;
0390                 } else {
0391                     ntfs_debug("Adding new run, is first "
0392                             "run.");
0393                     rl[rlpos].vcn = start_vcn;
0394                 }
0395                 rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
0396                 rl[rlpos].length = prev_run_len = 1;
0397                 rlpos++;
0398             }
0399             /* Done? */
0400             if (!--clusters) {
0401                 LCN tc;
0402                 /*
0403                  * Update the current zone position.  Positions
0404                  * of already scanned zones have been updated
0405                  * during the respective zone switches.
0406                  */
0407                 tc = lcn + bmp_pos + 1;
0408                 ntfs_debug("Done. Updating current zone "
0409                         "position, tc 0x%llx, "
0410                         "search_zone %i.",
0411                         (unsigned long long)tc,
0412                         search_zone);
0413                 switch (search_zone) {
0414                 case 1:
0415                     ntfs_debug("Before checks, "
0416                             "vol->mft_zone_pos "
0417                             "0x%llx.",
0418                             (unsigned long long)
0419                             vol->mft_zone_pos);
0420                     if (tc >= vol->mft_zone_end) {
0421                         vol->mft_zone_pos =
0422                                 vol->mft_lcn;
0423                         if (!vol->mft_zone_end)
0424                             vol->mft_zone_pos = 0;
0425                     } else if ((bmp_initial_pos >=
0426                             vol->mft_zone_pos ||
0427                             tc > vol->mft_zone_pos)
0428                             && tc >= vol->mft_lcn)
0429                         vol->mft_zone_pos = tc;
0430                     ntfs_debug("After checks, "
0431                             "vol->mft_zone_pos "
0432                             "0x%llx.",
0433                             (unsigned long long)
0434                             vol->mft_zone_pos);
0435                     break;
0436                 case 2:
0437                     ntfs_debug("Before checks, "
0438                             "vol->data1_zone_pos "
0439                             "0x%llx.",
0440                             (unsigned long long)
0441                             vol->data1_zone_pos);
0442                     if (tc >= vol->nr_clusters)
0443                         vol->data1_zone_pos =
0444                                  vol->mft_zone_end;
0445                     else if ((bmp_initial_pos >=
0446                             vol->data1_zone_pos ||
0447                             tc > vol->data1_zone_pos)
0448                             && tc >= vol->mft_zone_end)
0449                         vol->data1_zone_pos = tc;
0450                     ntfs_debug("After checks, "
0451                             "vol->data1_zone_pos "
0452                             "0x%llx.",
0453                             (unsigned long long)
0454                             vol->data1_zone_pos);
0455                     break;
0456                 case 4:
0457                     ntfs_debug("Before checks, "
0458                             "vol->data2_zone_pos "
0459                             "0x%llx.",
0460                             (unsigned long long)
0461                             vol->data2_zone_pos);
0462                     if (tc >= vol->mft_zone_start)
0463                         vol->data2_zone_pos = 0;
0464                     else if (bmp_initial_pos >=
0465                               vol->data2_zone_pos ||
0466                               tc > vol->data2_zone_pos)
0467                         vol->data2_zone_pos = tc;
0468                     ntfs_debug("After checks, "
0469                             "vol->data2_zone_pos "
0470                             "0x%llx.",
0471                             (unsigned long long)
0472                             vol->data2_zone_pos);
0473                     break;
0474                 default:
0475                     BUG();
0476                 }
0477                 ntfs_debug("Finished.  Going to out.");
0478                 goto out;
0479             }
0480             lcn++;
0481         }
0482         bmp_pos += buf_size;
0483         ntfs_debug("After inner while loop: buf_size 0x%x, lcn "
0484                 "0x%llx, bmp_pos 0x%llx, need_writeback %i.",
0485                 buf_size, (unsigned long long)lcn,
0486                 (unsigned long long)bmp_pos, need_writeback);
0487         if (bmp_pos < zone_end) {
0488             ntfs_debug("Continuing outer while loop, "
0489                     "bmp_pos 0x%llx, zone_end 0x%llx.",
0490                     (unsigned long long)bmp_pos,
0491                     (unsigned long long)zone_end);
0492             continue;
0493         }
0494 zone_pass_done: /* Finished with the current zone pass. */
0495         ntfs_debug("At zone_pass_done, pass %i.", pass);
0496         if (pass == 1) {
0497             /*
0498              * Now do pass 2, scanning the first part of the zone
0499              * we omitted in pass 1.
0500              */
0501             pass = 2;
0502             zone_end = zone_start;
0503             switch (search_zone) {
0504             case 1: /* mft_zone */
0505                 zone_start = vol->mft_zone_start;
0506                 break;
0507             case 2: /* data1_zone */
0508                 zone_start = vol->mft_zone_end;
0509                 break;
0510             case 4: /* data2_zone */
0511                 zone_start = 0;
0512                 break;
0513             default:
0514                 BUG();
0515             }
0516             /* Sanity check. */
0517             if (zone_end < zone_start)
0518                 zone_end = zone_start;
0519             bmp_pos = zone_start;
0520             ntfs_debug("Continuing outer while loop, pass 2, "
0521                     "zone_start 0x%llx, zone_end 0x%llx, "
0522                     "bmp_pos 0x%llx.",
0523                     (unsigned long long)zone_start,
0524                     (unsigned long long)zone_end,
0525                     (unsigned long long)bmp_pos);
0526             continue;
0527         } /* pass == 2 */
0528 done_zones_check:
0529         ntfs_debug("At done_zones_check, search_zone %i, done_zones "
0530                 "before 0x%x, done_zones after 0x%x.",
0531                 search_zone, done_zones,
0532                 done_zones | search_zone);
0533         done_zones |= search_zone;
0534         if (done_zones < 7) {
0535             ntfs_debug("Switching zone.");
0536             /* Now switch to the next zone we haven't done yet. */
0537             pass = 1;
0538             switch (search_zone) {
0539             case 1:
0540                 ntfs_debug("Switching from mft zone to data1 "
0541                         "zone.");
0542                 /* Update mft zone position. */
0543                 if (rlpos) {
0544                     LCN tc;
0545 
0546                     ntfs_debug("Before checks, "
0547                             "vol->mft_zone_pos "
0548                             "0x%llx.",
0549                             (unsigned long long)
0550                             vol->mft_zone_pos);
0551                     tc = rl[rlpos - 1].lcn +
0552                             rl[rlpos - 1].length;
0553                     if (tc >= vol->mft_zone_end) {
0554                         vol->mft_zone_pos =
0555                                 vol->mft_lcn;
0556                         if (!vol->mft_zone_end)
0557                             vol->mft_zone_pos = 0;
0558                     } else if ((bmp_initial_pos >=
0559                             vol->mft_zone_pos ||
0560                             tc > vol->mft_zone_pos)
0561                             && tc >= vol->mft_lcn)
0562                         vol->mft_zone_pos = tc;
0563                     ntfs_debug("After checks, "
0564                             "vol->mft_zone_pos "
0565                             "0x%llx.",
0566                             (unsigned long long)
0567                             vol->mft_zone_pos);
0568                 }
0569                 /* Switch from mft zone to data1 zone. */
0570 switch_to_data1_zone:       search_zone = 2;
0571                 zone_start = bmp_initial_pos =
0572                         vol->data1_zone_pos;
0573                 zone_end = vol->nr_clusters;
0574                 if (zone_start == vol->mft_zone_end)
0575                     pass = 2;
0576                 if (zone_start >= zone_end) {
0577                     vol->data1_zone_pos = zone_start =
0578                             vol->mft_zone_end;
0579                     pass = 2;
0580                 }
0581                 break;
0582             case 2:
0583                 ntfs_debug("Switching from data1 zone to "
0584                         "data2 zone.");
0585                 /* Update data1 zone position. */
0586                 if (rlpos) {
0587                     LCN tc;
0588 
0589                     ntfs_debug("Before checks, "
0590                             "vol->data1_zone_pos "
0591                             "0x%llx.",
0592                             (unsigned long long)
0593                             vol->data1_zone_pos);
0594                     tc = rl[rlpos - 1].lcn +
0595                             rl[rlpos - 1].length;
0596                     if (tc >= vol->nr_clusters)
0597                         vol->data1_zone_pos =
0598                                  vol->mft_zone_end;
0599                     else if ((bmp_initial_pos >=
0600                             vol->data1_zone_pos ||
0601                             tc > vol->data1_zone_pos)
0602                             && tc >= vol->mft_zone_end)
0603                         vol->data1_zone_pos = tc;
0604                     ntfs_debug("After checks, "
0605                             "vol->data1_zone_pos "
0606                             "0x%llx.",
0607                             (unsigned long long)
0608                             vol->data1_zone_pos);
0609                 }
0610                 /* Switch from data1 zone to data2 zone. */
0611                 search_zone = 4;
0612                 zone_start = bmp_initial_pos =
0613                         vol->data2_zone_pos;
0614                 zone_end = vol->mft_zone_start;
0615                 if (!zone_start)
0616                     pass = 2;
0617                 if (zone_start >= zone_end) {
0618                     vol->data2_zone_pos = zone_start =
0619                             bmp_initial_pos = 0;
0620                     pass = 2;
0621                 }
0622                 break;
0623             case 4:
0624                 ntfs_debug("Switching from data2 zone to "
0625                         "data1 zone.");
0626                 /* Update data2 zone position. */
0627                 if (rlpos) {
0628                     LCN tc;
0629 
0630                     ntfs_debug("Before checks, "
0631                             "vol->data2_zone_pos "
0632                             "0x%llx.",
0633                             (unsigned long long)
0634                             vol->data2_zone_pos);
0635                     tc = rl[rlpos - 1].lcn +
0636                             rl[rlpos - 1].length;
0637                     if (tc >= vol->mft_zone_start)
0638                         vol->data2_zone_pos = 0;
0639                     else if (bmp_initial_pos >=
0640                               vol->data2_zone_pos ||
0641                               tc > vol->data2_zone_pos)
0642                         vol->data2_zone_pos = tc;
0643                     ntfs_debug("After checks, "
0644                             "vol->data2_zone_pos "
0645                             "0x%llx.",
0646                             (unsigned long long)
0647                             vol->data2_zone_pos);
0648                 }
0649                 /* Switch from data2 zone to data1 zone. */
0650                 goto switch_to_data1_zone;
0651             default:
0652                 BUG();
0653             }
0654             ntfs_debug("After zone switch, search_zone %i, "
0655                     "pass %i, bmp_initial_pos 0x%llx, "
0656                     "zone_start 0x%llx, zone_end 0x%llx.",
0657                     search_zone, pass,
0658                     (unsigned long long)bmp_initial_pos,
0659                     (unsigned long long)zone_start,
0660                     (unsigned long long)zone_end);
0661             bmp_pos = zone_start;
0662             if (zone_start == zone_end) {
0663                 ntfs_debug("Empty zone, going to "
0664                         "done_zones_check.");
0665                 /* Empty zone. Don't bother searching it. */
0666                 goto done_zones_check;
0667             }
0668             ntfs_debug("Continuing outer while loop.");
0669             continue;
0670         } /* done_zones == 7 */
0671         ntfs_debug("All zones are finished.");
0672         /*
0673          * All zones are finished!  If DATA_ZONE, shrink mft zone.  If
0674          * MFT_ZONE, we have really run out of space.
0675          */
0676         mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
0677         ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end "
0678                 "0x%llx, mft_zone_size 0x%llx.",
0679                 (unsigned long long)vol->mft_zone_start,
0680                 (unsigned long long)vol->mft_zone_end,
0681                 (unsigned long long)mft_zone_size);
0682         if (zone == MFT_ZONE || mft_zone_size <= 0) {
0683             ntfs_debug("No free clusters left, going to out.");
0684             /* Really no more space left on device. */
0685             err = -ENOSPC;
0686             goto out;
0687         } /* zone == DATA_ZONE && mft_zone_size > 0 */
0688         ntfs_debug("Shrinking mft zone.");
0689         zone_end = vol->mft_zone_end;
0690         mft_zone_size >>= 1;
0691         if (mft_zone_size > 0)
0692             vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
0693         else /* mft zone and data2 zone no longer exist. */
0694             vol->data2_zone_pos = vol->mft_zone_start =
0695                     vol->mft_zone_end = 0;
0696         if (vol->mft_zone_pos >= vol->mft_zone_end) {
0697             vol->mft_zone_pos = vol->mft_lcn;
0698             if (!vol->mft_zone_end)
0699                 vol->mft_zone_pos = 0;
0700         }
0701         bmp_pos = zone_start = bmp_initial_pos =
0702                 vol->data1_zone_pos = vol->mft_zone_end;
0703         search_zone = 2;
0704         pass = 2;
0705         done_zones &= ~2;
0706         ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, "
0707                 "vol->mft_zone_start 0x%llx, "
0708                 "vol->mft_zone_end 0x%llx, "
0709                 "vol->mft_zone_pos 0x%llx, search_zone 2, "
0710                 "pass 2, dones_zones 0x%x, zone_start 0x%llx, "
0711                 "zone_end 0x%llx, vol->data1_zone_pos 0x%llx, "
0712                 "continuing outer while loop.",
0713                 (unsigned long long)mft_zone_size,
0714                 (unsigned long long)vol->mft_zone_start,
0715                 (unsigned long long)vol->mft_zone_end,
0716                 (unsigned long long)vol->mft_zone_pos,
0717                 done_zones, (unsigned long long)zone_start,
0718                 (unsigned long long)zone_end,
0719                 (unsigned long long)vol->data1_zone_pos);
0720     }
0721     ntfs_debug("After outer while loop.");
0722 out:
0723     ntfs_debug("At out.");
0724     /* Add runlist terminator element. */
0725     if (likely(rl)) {
0726         rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
0727         rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
0728         rl[rlpos].length = 0;
0729     }
0730     if (likely(page && !IS_ERR(page))) {
0731         if (need_writeback) {
0732             ntfs_debug("Marking page dirty.");
0733             flush_dcache_page(page);
0734             set_page_dirty(page);
0735             need_writeback = 0;
0736         }
0737         ntfs_unmap_page(page);
0738     }
0739     if (likely(!err)) {
0740         up_write(&vol->lcnbmp_lock);
0741         ntfs_debug("Done.");
0742         return rl;
0743     }
0744     ntfs_error(vol->sb, "Failed to allocate clusters, aborting "
0745             "(error %i).", err);
0746     if (rl) {
0747         int err2;
0748 
0749         if (err == -ENOSPC)
0750             ntfs_debug("Not enough space to complete allocation, "
0751                     "err -ENOSPC, first free lcn 0x%llx, "
0752                     "could allocate up to 0x%llx "
0753                     "clusters.",
0754                     (unsigned long long)rl[0].lcn,
0755                     (unsigned long long)(count - clusters));
0756         /* Deallocate all allocated clusters. */
0757         ntfs_debug("Attempting rollback...");
0758         err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
0759         if (err2) {
0760             ntfs_error(vol->sb, "Failed to rollback (error %i).  "
0761                     "Leaving inconsistent metadata!  "
0762                     "Unmount and run chkdsk.", err2);
0763             NVolSetErrors(vol);
0764         }
0765         /* Free the runlist. */
0766         ntfs_free(rl);
0767     } else if (err == -ENOSPC)
0768         ntfs_debug("No space left at all, err = -ENOSPC, first free "
0769                 "lcn = 0x%llx.",
0770                 (long long)vol->data1_zone_pos);
0771     up_write(&vol->lcnbmp_lock);
0772     return ERR_PTR(err);
0773 }
0774 
0775 /**
0776  * __ntfs_cluster_free - free clusters on an ntfs volume
0777  * @ni:     ntfs inode whose runlist describes the clusters to free
0778  * @start_vcn:  vcn in the runlist of @ni at which to start freeing clusters
0779  * @count:  number of clusters to free or -1 for all clusters
0780  * @ctx:    active attribute search context if present or NULL if not
0781  * @is_rollback:    true if this is a rollback operation
0782  *
0783  * Free @count clusters starting at the cluster @start_vcn in the runlist
0784  * described by the vfs inode @ni.
0785  *
0786  * If @count is -1, all clusters from @start_vcn to the end of the runlist are
0787  * deallocated.  Thus, to completely free all clusters in a runlist, use
0788  * @start_vcn = 0 and @count = -1.
0789  *
0790  * If @ctx is specified, it is an active search context of @ni and its base mft
0791  * record.  This is needed when __ntfs_cluster_free() encounters unmapped
0792  * runlist fragments and allows their mapping.  If you do not have the mft
0793  * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
0794  * perform the necessary mapping and unmapping.
0795  *
0796  * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
0797  * before returning.  Thus, @ctx will be left pointing to the same attribute on
0798  * return as on entry.  However, the actual pointers in @ctx may point to
0799  * different memory locations on return, so you must remember to reset any
0800  * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
0801  * you will probably want to do:
0802  *  m = ctx->mrec;
0803  *  a = ctx->attr;
0804  * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
0805  * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
0806  *
0807  * @is_rollback should always be 'false', it is for internal use to rollback
0808  * errors.  You probably want to use ntfs_cluster_free() instead.
0809  *
0810  * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
0811  * remove from the runlist or mark sparse the freed runs later.
0812  *
0813  * Return the number of deallocated clusters (not counting sparse ones) on
0814  * success and -errno on error.
0815  *
0816  * WARNING: If @ctx is supplied, regardless of whether success or failure is
0817  *      returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
0818  *      is no longer valid, i.e. you need to either call
0819  *      ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
0820  *      In that case PTR_ERR(@ctx->mrec) will give you the error code for
0821  *      why the mapping of the old inode failed.
0822  *
0823  * Locking: - The runlist described by @ni must be locked for writing on entry
0824  *        and is locked on return.  Note the runlist may be modified when
0825  *        needed runlist fragments need to be mapped.
0826  *      - The volume lcn bitmap must be unlocked on entry and is unlocked
0827  *        on return.
0828  *      - This function takes the volume lcn bitmap lock for writing and
0829  *        modifies the bitmap contents.
0830  *      - If @ctx is NULL, the base mft record of @ni must not be mapped on
0831  *        entry and it will be left unmapped on return.
0832  *      - If @ctx is not NULL, the base mft record must be mapped on entry
0833  *        and it will be left mapped on return.
0834  */
0835 s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
0836         ntfs_attr_search_ctx *ctx, const bool is_rollback)
0837 {
0838     s64 delta, to_free, total_freed, real_freed;
0839     ntfs_volume *vol;
0840     struct inode *lcnbmp_vi;
0841     runlist_element *rl;
0842     int err;
0843 
0844     BUG_ON(!ni);
0845     ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count "
0846             "0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn,
0847             (unsigned long long)count,
0848             is_rollback ? " (rollback)" : "");
0849     vol = ni->vol;
0850     lcnbmp_vi = vol->lcnbmp_ino;
0851     BUG_ON(!lcnbmp_vi);
0852     BUG_ON(start_vcn < 0);
0853     BUG_ON(count < -1);
0854     /*
0855      * Lock the lcn bitmap for writing but only if not rolling back.  We
0856      * must hold the lock all the way including through rollback otherwise
0857      * rollback is not possible because once we have cleared a bit and
0858      * dropped the lock, anyone could have set the bit again, thus
0859      * allocating the cluster for another use.
0860      */
0861     if (likely(!is_rollback))
0862         down_write(&vol->lcnbmp_lock);
0863 
0864     total_freed = real_freed = 0;
0865 
0866     rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
0867     if (IS_ERR(rl)) {
0868         if (!is_rollback)
0869             ntfs_error(vol->sb, "Failed to find first runlist "
0870                     "element (error %li), aborting.",
0871                     PTR_ERR(rl));
0872         err = PTR_ERR(rl);
0873         goto err_out;
0874     }
0875     if (unlikely(rl->lcn < LCN_HOLE)) {
0876         if (!is_rollback)
0877             ntfs_error(vol->sb, "First runlist element has "
0878                     "invalid lcn, aborting.");
0879         err = -EIO;
0880         goto err_out;
0881     }
0882     /* Find the starting cluster inside the run that needs freeing. */
0883     delta = start_vcn - rl->vcn;
0884 
0885     /* The number of clusters in this run that need freeing. */
0886     to_free = rl->length - delta;
0887     if (count >= 0 && to_free > count)
0888         to_free = count;
0889 
0890     if (likely(rl->lcn >= 0)) {
0891         /* Do the actual freeing of the clusters in this run. */
0892         err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta,
0893                 to_free, likely(!is_rollback) ? 0 : 1);
0894         if (unlikely(err)) {
0895             if (!is_rollback)
0896                 ntfs_error(vol->sb, "Failed to clear first run "
0897                         "(error %i), aborting.", err);
0898             goto err_out;
0899         }
0900         /* We have freed @to_free real clusters. */
0901         real_freed = to_free;
0902     };
0903     /* Go to the next run and adjust the number of clusters left to free. */
0904     ++rl;
0905     if (count >= 0)
0906         count -= to_free;
0907 
0908     /* Keep track of the total "freed" clusters, including sparse ones. */
0909     total_freed = to_free;
0910     /*
0911      * Loop over the remaining runs, using @count as a capping value, and
0912      * free them.
0913      */
0914     for (; rl->length && count != 0; ++rl) {
0915         if (unlikely(rl->lcn < LCN_HOLE)) {
0916             VCN vcn;
0917 
0918             /* Attempt to map runlist. */
0919             vcn = rl->vcn;
0920             rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
0921             if (IS_ERR(rl)) {
0922                 err = PTR_ERR(rl);
0923                 if (!is_rollback)
0924                     ntfs_error(vol->sb, "Failed to map "
0925                             "runlist fragment or "
0926                             "failed to find "
0927                             "subsequent runlist "
0928                             "element.");
0929                 goto err_out;
0930             }
0931             if (unlikely(rl->lcn < LCN_HOLE)) {
0932                 if (!is_rollback)
0933                     ntfs_error(vol->sb, "Runlist element "
0934                             "has invalid lcn "
0935                             "(0x%llx).",
0936                             (unsigned long long)
0937                             rl->lcn);
0938                 err = -EIO;
0939                 goto err_out;
0940             }
0941         }
0942         /* The number of clusters in this run that need freeing. */
0943         to_free = rl->length;
0944         if (count >= 0 && to_free > count)
0945             to_free = count;
0946 
0947         if (likely(rl->lcn >= 0)) {
0948             /* Do the actual freeing of the clusters in the run. */
0949             err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn,
0950                     to_free, likely(!is_rollback) ? 0 : 1);
0951             if (unlikely(err)) {
0952                 if (!is_rollback)
0953                     ntfs_error(vol->sb, "Failed to clear "
0954                             "subsequent run.");
0955                 goto err_out;
0956             }
0957             /* We have freed @to_free real clusters. */
0958             real_freed += to_free;
0959         }
0960         /* Adjust the number of clusters left to free. */
0961         if (count >= 0)
0962             count -= to_free;
0963     
0964         /* Update the total done clusters. */
0965         total_freed += to_free;
0966     }
0967     if (likely(!is_rollback))
0968         up_write(&vol->lcnbmp_lock);
0969 
0970     BUG_ON(count > 0);
0971 
0972     /* We are done.  Return the number of actually freed clusters. */
0973     ntfs_debug("Done.");
0974     return real_freed;
0975 err_out:
0976     if (is_rollback)
0977         return err;
0978     /* If no real clusters were freed, no need to rollback. */
0979     if (!real_freed) {
0980         up_write(&vol->lcnbmp_lock);
0981         return err;
0982     }
0983     /*
0984      * Attempt to rollback and if that succeeds just return the error code.
0985      * If rollback fails, set the volume errors flag, emit an error
0986      * message, and return the error code.
0987      */
0988     delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true);
0989     if (delta < 0) {
0990         ntfs_error(vol->sb, "Failed to rollback (error %i).  Leaving "
0991                 "inconsistent metadata!  Unmount and run "
0992                 "chkdsk.", (int)delta);
0993         NVolSetErrors(vol);
0994     }
0995     up_write(&vol->lcnbmp_lock);
0996     ntfs_error(vol->sb, "Aborting (error %i).", err);
0997     return err;
0998 }
0999 
1000 #endif /* NTFS_RW */