Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /**
0003  * compress.c - NTFS kernel compressed attributes handling.
0004  *      Part of the Linux-NTFS project.
0005  *
0006  * Copyright (c) 2001-2004 Anton Altaparmakov
0007  * Copyright (c) 2002 Richard Russon
0008  */
0009 
0010 #include <linux/fs.h>
0011 #include <linux/buffer_head.h>
0012 #include <linux/blkdev.h>
0013 #include <linux/vmalloc.h>
0014 #include <linux/slab.h>
0015 
0016 #include "attrib.h"
0017 #include "inode.h"
0018 #include "debug.h"
0019 #include "ntfs.h"
0020 
0021 /**
0022  * ntfs_compression_constants - enum of constants used in the compression code
0023  */
0024 typedef enum {
0025     /* Token types and access mask. */
0026     NTFS_SYMBOL_TOKEN   =   0,
0027     NTFS_PHRASE_TOKEN   =   1,
0028     NTFS_TOKEN_MASK     =   1,
0029 
0030     /* Compression sub-block constants. */
0031     NTFS_SB_SIZE_MASK   =   0x0fff,
0032     NTFS_SB_SIZE        =   0x1000,
0033     NTFS_SB_IS_COMPRESSED   =   0x8000,
0034 
0035     /*
0036      * The maximum compression block size is by definition 16 * the cluster
0037      * size, with the maximum supported cluster size being 4kiB. Thus the
0038      * maximum compression buffer size is 64kiB, so we use this when
0039      * initializing the compression buffer.
0040      */
0041     NTFS_MAX_CB_SIZE    = 64 * 1024,
0042 } ntfs_compression_constants;
0043 
0044 /**
0045  * ntfs_compression_buffer - one buffer for the decompression engine
0046  */
0047 static u8 *ntfs_compression_buffer;
0048 
0049 /**
0050  * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer
0051  */
0052 static DEFINE_SPINLOCK(ntfs_cb_lock);
0053 
0054 /**
0055  * allocate_compression_buffers - allocate the decompression buffers
0056  *
0057  * Caller has to hold the ntfs_lock mutex.
0058  *
0059  * Return 0 on success or -ENOMEM if the allocations failed.
0060  */
0061 int allocate_compression_buffers(void)
0062 {
0063     BUG_ON(ntfs_compression_buffer);
0064 
0065     ntfs_compression_buffer = vmalloc(NTFS_MAX_CB_SIZE);
0066     if (!ntfs_compression_buffer)
0067         return -ENOMEM;
0068     return 0;
0069 }
0070 
0071 /**
0072  * free_compression_buffers - free the decompression buffers
0073  *
0074  * Caller has to hold the ntfs_lock mutex.
0075  */
0076 void free_compression_buffers(void)
0077 {
0078     BUG_ON(!ntfs_compression_buffer);
0079     vfree(ntfs_compression_buffer);
0080     ntfs_compression_buffer = NULL;
0081 }
0082 
0083 /**
0084  * zero_partial_compressed_page - zero out of bounds compressed page region
0085  */
0086 static void zero_partial_compressed_page(struct page *page,
0087         const s64 initialized_size)
0088 {
0089     u8 *kp = page_address(page);
0090     unsigned int kp_ofs;
0091 
0092     ntfs_debug("Zeroing page region outside initialized size.");
0093     if (((s64)page->index << PAGE_SHIFT) >= initialized_size) {
0094         clear_page(kp);
0095         return;
0096     }
0097     kp_ofs = initialized_size & ~PAGE_MASK;
0098     memset(kp + kp_ofs, 0, PAGE_SIZE - kp_ofs);
0099     return;
0100 }
0101 
0102 /**
0103  * handle_bounds_compressed_page - test for&handle out of bounds compressed page
0104  */
0105 static inline void handle_bounds_compressed_page(struct page *page,
0106         const loff_t i_size, const s64 initialized_size)
0107 {
0108     if ((page->index >= (initialized_size >> PAGE_SHIFT)) &&
0109             (initialized_size < i_size))
0110         zero_partial_compressed_page(page, initialized_size);
0111     return;
0112 }
0113 
0114 /**
0115  * ntfs_decompress - decompress a compression block into an array of pages
0116  * @dest_pages:     destination array of pages
0117  * @completed_pages:    scratch space to track completed pages
0118  * @dest_index:     current index into @dest_pages (IN/OUT)
0119  * @dest_ofs:       current offset within @dest_pages[@dest_index] (IN/OUT)
0120  * @dest_max_index: maximum index into @dest_pages (IN)
0121  * @dest_max_ofs:   maximum offset within @dest_pages[@dest_max_index] (IN)
0122  * @xpage:      the target page (-1 if none) (IN)
0123  * @xpage_done:     set to 1 if xpage was completed successfully (IN/OUT)
0124  * @cb_start:       compression block to decompress (IN)
0125  * @cb_size:        size of compression block @cb_start in bytes (IN)
0126  * @i_size:     file size when we started the read (IN)
0127  * @initialized_size:   initialized file size when we started the read (IN)
0128  *
0129  * The caller must have disabled preemption. ntfs_decompress() reenables it when
0130  * the critical section is finished.
0131  *
0132  * This decompresses the compression block @cb_start into the array of
0133  * destination pages @dest_pages starting at index @dest_index into @dest_pages
0134  * and at offset @dest_pos into the page @dest_pages[@dest_index].
0135  *
0136  * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1.
0137  * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified.
0138  *
0139  * @cb_start is a pointer to the compression block which needs decompressing
0140  * and @cb_size is the size of @cb_start in bytes (8-64kiB).
0141  *
0142  * Return 0 if success or -EOVERFLOW on error in the compressed stream.
0143  * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
0144  * completed during the decompression of the compression block (@cb_start).
0145  *
0146  * Warning: This function *REQUIRES* PAGE_SIZE >= 4096 or it will blow up
0147  * unpredicatbly! You have been warned!
0148  *
0149  * Note to hackers: This function may not sleep until it has finished accessing
0150  * the compression block @cb_start as it is a per-CPU buffer.
0151  */
0152 static int ntfs_decompress(struct page *dest_pages[], int completed_pages[],
0153         int *dest_index, int *dest_ofs, const int dest_max_index,
0154         const int dest_max_ofs, const int xpage, char *xpage_done,
0155         u8 *const cb_start, const u32 cb_size, const loff_t i_size,
0156         const s64 initialized_size)
0157 {
0158     /*
0159      * Pointers into the compressed data, i.e. the compression block (cb),
0160      * and the therein contained sub-blocks (sb).
0161      */
0162     u8 *cb_end = cb_start + cb_size; /* End of cb. */
0163     u8 *cb = cb_start;  /* Current position in cb. */
0164     u8 *cb_sb_start = cb;   /* Beginning of the current sb in the cb. */
0165     u8 *cb_sb_end;      /* End of current sb / beginning of next sb. */
0166 
0167     /* Variables for uncompressed data / destination. */
0168     struct page *dp;    /* Current destination page being worked on. */
0169     u8 *dp_addr;        /* Current pointer into dp. */
0170     u8 *dp_sb_start;    /* Start of current sub-block in dp. */
0171     u8 *dp_sb_end;      /* End of current sb in dp (dp_sb_start +
0172                    NTFS_SB_SIZE). */
0173     u16 do_sb_start;    /* @dest_ofs when starting this sub-block. */
0174     u16 do_sb_end;      /* @dest_ofs of end of this sb (do_sb_start +
0175                    NTFS_SB_SIZE). */
0176 
0177     /* Variables for tag and token parsing. */
0178     u8 tag;         /* Current tag. */
0179     int token;      /* Loop counter for the eight tokens in tag. */
0180     int nr_completed_pages = 0;
0181 
0182     /* Default error code. */
0183     int err = -EOVERFLOW;
0184 
0185     ntfs_debug("Entering, cb_size = 0x%x.", cb_size);
0186 do_next_sb:
0187     ntfs_debug("Beginning sub-block at offset = 0x%zx in the cb.",
0188             cb - cb_start);
0189     /*
0190      * Have we reached the end of the compression block or the end of the
0191      * decompressed data?  The latter can happen for example if the current
0192      * position in the compression block is one byte before its end so the
0193      * first two checks do not detect it.
0194      */
0195     if (cb == cb_end || !le16_to_cpup((le16*)cb) ||
0196             (*dest_index == dest_max_index &&
0197             *dest_ofs == dest_max_ofs)) {
0198         int i;
0199 
0200         ntfs_debug("Completed. Returning success (0).");
0201         err = 0;
0202 return_error:
0203         /* We can sleep from now on, so we drop lock. */
0204         spin_unlock(&ntfs_cb_lock);
0205         /* Second stage: finalize completed pages. */
0206         if (nr_completed_pages > 0) {
0207             for (i = 0; i < nr_completed_pages; i++) {
0208                 int di = completed_pages[i];
0209 
0210                 dp = dest_pages[di];
0211                 /*
0212                  * If we are outside the initialized size, zero
0213                  * the out of bounds page range.
0214                  */
0215                 handle_bounds_compressed_page(dp, i_size,
0216                         initialized_size);
0217                 flush_dcache_page(dp);
0218                 kunmap(dp);
0219                 SetPageUptodate(dp);
0220                 unlock_page(dp);
0221                 if (di == xpage)
0222                     *xpage_done = 1;
0223                 else
0224                     put_page(dp);
0225                 dest_pages[di] = NULL;
0226             }
0227         }
0228         return err;
0229     }
0230 
0231     /* Setup offsets for the current sub-block destination. */
0232     do_sb_start = *dest_ofs;
0233     do_sb_end = do_sb_start + NTFS_SB_SIZE;
0234 
0235     /* Check that we are still within allowed boundaries. */
0236     if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs)
0237         goto return_overflow;
0238 
0239     /* Does the minimum size of a compressed sb overflow valid range? */
0240     if (cb + 6 > cb_end)
0241         goto return_overflow;
0242 
0243     /* Setup the current sub-block source pointers and validate range. */
0244     cb_sb_start = cb;
0245     cb_sb_end = cb_sb_start + (le16_to_cpup((le16*)cb) & NTFS_SB_SIZE_MASK)
0246             + 3;
0247     if (cb_sb_end > cb_end)
0248         goto return_overflow;
0249 
0250     /* Get the current destination page. */
0251     dp = dest_pages[*dest_index];
0252     if (!dp) {
0253         /* No page present. Skip decompression of this sub-block. */
0254         cb = cb_sb_end;
0255 
0256         /* Advance destination position to next sub-block. */
0257         *dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_MASK;
0258         if (!*dest_ofs && (++*dest_index > dest_max_index))
0259             goto return_overflow;
0260         goto do_next_sb;
0261     }
0262 
0263     /* We have a valid destination page. Setup the destination pointers. */
0264     dp_addr = (u8*)page_address(dp) + do_sb_start;
0265 
0266     /* Now, we are ready to process the current sub-block (sb). */
0267     if (!(le16_to_cpup((le16*)cb) & NTFS_SB_IS_COMPRESSED)) {
0268         ntfs_debug("Found uncompressed sub-block.");
0269         /* This sb is not compressed, just copy it into destination. */
0270 
0271         /* Advance source position to first data byte. */
0272         cb += 2;
0273 
0274         /* An uncompressed sb must be full size. */
0275         if (cb_sb_end - cb != NTFS_SB_SIZE)
0276             goto return_overflow;
0277 
0278         /* Copy the block and advance the source position. */
0279         memcpy(dp_addr, cb, NTFS_SB_SIZE);
0280         cb += NTFS_SB_SIZE;
0281 
0282         /* Advance destination position to next sub-block. */
0283         *dest_ofs += NTFS_SB_SIZE;
0284         if (!(*dest_ofs &= ~PAGE_MASK)) {
0285 finalize_page:
0286             /*
0287              * First stage: add current page index to array of
0288              * completed pages.
0289              */
0290             completed_pages[nr_completed_pages++] = *dest_index;
0291             if (++*dest_index > dest_max_index)
0292                 goto return_overflow;
0293         }
0294         goto do_next_sb;
0295     }
0296     ntfs_debug("Found compressed sub-block.");
0297     /* This sb is compressed, decompress it into destination. */
0298 
0299     /* Setup destination pointers. */
0300     dp_sb_start = dp_addr;
0301     dp_sb_end = dp_sb_start + NTFS_SB_SIZE;
0302 
0303     /* Forward to the first tag in the sub-block. */
0304     cb += 2;
0305 do_next_tag:
0306     if (cb == cb_sb_end) {
0307         /* Check if the decompressed sub-block was not full-length. */
0308         if (dp_addr < dp_sb_end) {
0309             int nr_bytes = do_sb_end - *dest_ofs;
0310 
0311             ntfs_debug("Filling incomplete sub-block with "
0312                     "zeroes.");
0313             /* Zero remainder and update destination position. */
0314             memset(dp_addr, 0, nr_bytes);
0315             *dest_ofs += nr_bytes;
0316         }
0317         /* We have finished the current sub-block. */
0318         if (!(*dest_ofs &= ~PAGE_MASK))
0319             goto finalize_page;
0320         goto do_next_sb;
0321     }
0322 
0323     /* Check we are still in range. */
0324     if (cb > cb_sb_end || dp_addr > dp_sb_end)
0325         goto return_overflow;
0326 
0327     /* Get the next tag and advance to first token. */
0328     tag = *cb++;
0329 
0330     /* Parse the eight tokens described by the tag. */
0331     for (token = 0; token < 8; token++, tag >>= 1) {
0332         u16 lg, pt, length, max_non_overlap;
0333         register u16 i;
0334         u8 *dp_back_addr;
0335 
0336         /* Check if we are done / still in range. */
0337         if (cb >= cb_sb_end || dp_addr > dp_sb_end)
0338             break;
0339 
0340         /* Determine token type and parse appropriately.*/
0341         if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
0342             /*
0343              * We have a symbol token, copy the symbol across, and
0344              * advance the source and destination positions.
0345              */
0346             *dp_addr++ = *cb++;
0347             ++*dest_ofs;
0348 
0349             /* Continue with the next token. */
0350             continue;
0351         }
0352 
0353         /*
0354          * We have a phrase token. Make sure it is not the first tag in
0355          * the sb as this is illegal and would confuse the code below.
0356          */
0357         if (dp_addr == dp_sb_start)
0358             goto return_overflow;
0359 
0360         /*
0361          * Determine the number of bytes to go back (p) and the number
0362          * of bytes to copy (l). We use an optimized algorithm in which
0363          * we first calculate log2(current destination position in sb),
0364          * which allows determination of l and p in O(1) rather than
0365          * O(n). We just need an arch-optimized log2() function now.
0366          */
0367         lg = 0;
0368         for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1)
0369             lg++;
0370 
0371         /* Get the phrase token into i. */
0372         pt = le16_to_cpup((le16*)cb);
0373 
0374         /*
0375          * Calculate starting position of the byte sequence in
0376          * the destination using the fact that p = (pt >> (12 - lg)) + 1
0377          * and make sure we don't go too far back.
0378          */
0379         dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1;
0380         if (dp_back_addr < dp_sb_start)
0381             goto return_overflow;
0382 
0383         /* Now calculate the length of the byte sequence. */
0384         length = (pt & (0xfff >> lg)) + 3;
0385 
0386         /* Advance destination position and verify it is in range. */
0387         *dest_ofs += length;
0388         if (*dest_ofs > do_sb_end)
0389             goto return_overflow;
0390 
0391         /* The number of non-overlapping bytes. */
0392         max_non_overlap = dp_addr - dp_back_addr;
0393 
0394         if (length <= max_non_overlap) {
0395             /* The byte sequence doesn't overlap, just copy it. */
0396             memcpy(dp_addr, dp_back_addr, length);
0397 
0398             /* Advance destination pointer. */
0399             dp_addr += length;
0400         } else {
0401             /*
0402              * The byte sequence does overlap, copy non-overlapping
0403              * part and then do a slow byte by byte copy for the
0404              * overlapping part. Also, advance the destination
0405              * pointer.
0406              */
0407             memcpy(dp_addr, dp_back_addr, max_non_overlap);
0408             dp_addr += max_non_overlap;
0409             dp_back_addr += max_non_overlap;
0410             length -= max_non_overlap;
0411             while (length--)
0412                 *dp_addr++ = *dp_back_addr++;
0413         }
0414 
0415         /* Advance source position and continue with the next token. */
0416         cb += 2;
0417     }
0418 
0419     /* No tokens left in the current tag. Continue with the next tag. */
0420     goto do_next_tag;
0421 
0422 return_overflow:
0423     ntfs_error(NULL, "Failed. Returning -EOVERFLOW.");
0424     goto return_error;
0425 }
0426 
0427 /**
0428  * ntfs_read_compressed_block - read a compressed block into the page cache
0429  * @page:   locked page in the compression block(s) we need to read
0430  *
0431  * When we are called the page has already been verified to be locked and the
0432  * attribute is known to be non-resident, not encrypted, but compressed.
0433  *
0434  * 1. Determine which compression block(s) @page is in.
0435  * 2. Get hold of all pages corresponding to this/these compression block(s).
0436  * 3. Read the (first) compression block.
0437  * 4. Decompress it into the corresponding pages.
0438  * 5. Throw the compressed data away and proceed to 3. for the next compression
0439  *    block or return success if no more compression blocks left.
0440  *
0441  * Warning: We have to be careful what we do about existing pages. They might
0442  * have been written to so that we would lose data if we were to just overwrite
0443  * them with the out-of-date uncompressed data.
0444  *
0445  * FIXME: For PAGE_SIZE > cb_size we are not doing the Right Thing(TM) at
0446  * the end of the file I think. We need to detect this case and zero the out
0447  * of bounds remainder of the page in question and mark it as handled. At the
0448  * moment we would just return -EIO on such a page. This bug will only become
0449  * apparent if pages are above 8kiB and the NTFS volume only uses 512 byte
0450  * clusters so is probably not going to be seen by anyone. Still this should
0451  * be fixed. (AIA)
0452  *
0453  * FIXME: Again for PAGE_SIZE > cb_size we are screwing up both in
0454  * handling sparse and compressed cbs. (AIA)
0455  *
0456  * FIXME: At the moment we don't do any zeroing out in the case that
0457  * initialized_size is less than data_size. This should be safe because of the
0458  * nature of the compression algorithm used. Just in case we check and output
0459  * an error message in read inode if the two sizes are not equal for a
0460  * compressed file. (AIA)
0461  */
0462 int ntfs_read_compressed_block(struct page *page)
0463 {
0464     loff_t i_size;
0465     s64 initialized_size;
0466     struct address_space *mapping = page->mapping;
0467     ntfs_inode *ni = NTFS_I(mapping->host);
0468     ntfs_volume *vol = ni->vol;
0469     struct super_block *sb = vol->sb;
0470     runlist_element *rl;
0471     unsigned long flags, block_size = sb->s_blocksize;
0472     unsigned char block_size_bits = sb->s_blocksize_bits;
0473     u8 *cb, *cb_pos, *cb_end;
0474     struct buffer_head **bhs;
0475     unsigned long offset, index = page->index;
0476     u32 cb_size = ni->itype.compressed.block_size;
0477     u64 cb_size_mask = cb_size - 1UL;
0478     VCN vcn;
0479     LCN lcn;
0480     /* The first wanted vcn (minimum alignment is PAGE_SIZE). */
0481     VCN start_vcn = (((s64)index << PAGE_SHIFT) & ~cb_size_mask) >>
0482             vol->cluster_size_bits;
0483     /*
0484      * The first vcn after the last wanted vcn (minimum alignment is again
0485      * PAGE_SIZE.
0486      */
0487     VCN end_vcn = ((((s64)(index + 1UL) << PAGE_SHIFT) + cb_size - 1)
0488             & ~cb_size_mask) >> vol->cluster_size_bits;
0489     /* Number of compression blocks (cbs) in the wanted vcn range. */
0490     unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits
0491             >> ni->itype.compressed.block_size_bits;
0492     /*
0493      * Number of pages required to store the uncompressed data from all
0494      * compression blocks (cbs) overlapping @page. Due to alignment
0495      * guarantees of start_vcn and end_vcn, no need to round up here.
0496      */
0497     unsigned int nr_pages = (end_vcn - start_vcn) <<
0498             vol->cluster_size_bits >> PAGE_SHIFT;
0499     unsigned int xpage, max_page, cur_page, cur_ofs, i;
0500     unsigned int cb_clusters, cb_max_ofs;
0501     int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0;
0502     struct page **pages;
0503     int *completed_pages;
0504     unsigned char xpage_done = 0;
0505 
0506     ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = "
0507             "%i.", index, cb_size, nr_pages);
0508     /*
0509      * Bad things happen if we get here for anything that is not an
0510      * unnamed $DATA attribute.
0511      */
0512     BUG_ON(ni->type != AT_DATA);
0513     BUG_ON(ni->name_len);
0514 
0515     pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
0516     completed_pages = kmalloc_array(nr_pages + 1, sizeof(int), GFP_NOFS);
0517 
0518     /* Allocate memory to store the buffer heads we need. */
0519     bhs_size = cb_size / block_size * sizeof(struct buffer_head *);
0520     bhs = kmalloc(bhs_size, GFP_NOFS);
0521 
0522     if (unlikely(!pages || !bhs || !completed_pages)) {
0523         kfree(bhs);
0524         kfree(pages);
0525         kfree(completed_pages);
0526         unlock_page(page);
0527         ntfs_error(vol->sb, "Failed to allocate internal buffers.");
0528         return -ENOMEM;
0529     }
0530 
0531     /*
0532      * We have already been given one page, this is the one we must do.
0533      * Once again, the alignment guarantees keep it simple.
0534      */
0535     offset = start_vcn << vol->cluster_size_bits >> PAGE_SHIFT;
0536     xpage = index - offset;
0537     pages[xpage] = page;
0538     /*
0539      * The remaining pages need to be allocated and inserted into the page
0540      * cache, alignment guarantees keep all the below much simpler. (-8
0541      */
0542     read_lock_irqsave(&ni->size_lock, flags);
0543     i_size = i_size_read(VFS_I(ni));
0544     initialized_size = ni->initialized_size;
0545     read_unlock_irqrestore(&ni->size_lock, flags);
0546     max_page = ((i_size + PAGE_SIZE - 1) >> PAGE_SHIFT) -
0547             offset;
0548     /* Is the page fully outside i_size? (truncate in progress) */
0549     if (xpage >= max_page) {
0550         kfree(bhs);
0551         kfree(pages);
0552         kfree(completed_pages);
0553         zero_user(page, 0, PAGE_SIZE);
0554         ntfs_debug("Compressed read outside i_size - truncated?");
0555         SetPageUptodate(page);
0556         unlock_page(page);
0557         return 0;
0558     }
0559     if (nr_pages < max_page)
0560         max_page = nr_pages;
0561     for (i = 0; i < max_page; i++, offset++) {
0562         if (i != xpage)
0563             pages[i] = grab_cache_page_nowait(mapping, offset);
0564         page = pages[i];
0565         if (page) {
0566             /*
0567              * We only (re)read the page if it isn't already read
0568              * in and/or dirty or we would be losing data or at
0569              * least wasting our time.
0570              */
0571             if (!PageDirty(page) && (!PageUptodate(page) ||
0572                     PageError(page))) {
0573                 ClearPageError(page);
0574                 kmap(page);
0575                 continue;
0576             }
0577             unlock_page(page);
0578             put_page(page);
0579             pages[i] = NULL;
0580         }
0581     }
0582 
0583     /*
0584      * We have the runlist, and all the destination pages we need to fill.
0585      * Now read the first compression block.
0586      */
0587     cur_page = 0;
0588     cur_ofs = 0;
0589     cb_clusters = ni->itype.compressed.block_clusters;
0590 do_next_cb:
0591     nr_cbs--;
0592     nr_bhs = 0;
0593 
0594     /* Read all cb buffer heads one cluster at a time. */
0595     rl = NULL;
0596     for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn;
0597             vcn++) {
0598         bool is_retry = false;
0599 
0600         if (!rl) {
0601 lock_retry_remap:
0602             down_read(&ni->runlist.lock);
0603             rl = ni->runlist.rl;
0604         }
0605         if (likely(rl != NULL)) {
0606             /* Seek to element containing target vcn. */
0607             while (rl->length && rl[1].vcn <= vcn)
0608                 rl++;
0609             lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
0610         } else
0611             lcn = LCN_RL_NOT_MAPPED;
0612         ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
0613                 (unsigned long long)vcn,
0614                 (unsigned long long)lcn);
0615         if (lcn < 0) {
0616             /*
0617              * When we reach the first sparse cluster we have
0618              * finished with the cb.
0619              */
0620             if (lcn == LCN_HOLE)
0621                 break;
0622             if (is_retry || lcn != LCN_RL_NOT_MAPPED)
0623                 goto rl_err;
0624             is_retry = true;
0625             /*
0626              * Attempt to map runlist, dropping lock for the
0627              * duration.
0628              */
0629             up_read(&ni->runlist.lock);
0630             if (!ntfs_map_runlist(ni, vcn))
0631                 goto lock_retry_remap;
0632             goto map_rl_err;
0633         }
0634         block = lcn << vol->cluster_size_bits >> block_size_bits;
0635         /* Read the lcn from device in chunks of block_size bytes. */
0636         max_block = block + (vol->cluster_size >> block_size_bits);
0637         do {
0638             ntfs_debug("block = 0x%x.", block);
0639             if (unlikely(!(bhs[nr_bhs] = sb_getblk(sb, block))))
0640                 goto getblk_err;
0641             nr_bhs++;
0642         } while (++block < max_block);
0643     }
0644 
0645     /* Release the lock if we took it. */
0646     if (rl)
0647         up_read(&ni->runlist.lock);
0648 
0649     /* Setup and initiate io on all buffer heads. */
0650     for (i = 0; i < nr_bhs; i++) {
0651         struct buffer_head *tbh = bhs[i];
0652 
0653         if (!trylock_buffer(tbh))
0654             continue;
0655         if (unlikely(buffer_uptodate(tbh))) {
0656             unlock_buffer(tbh);
0657             continue;
0658         }
0659         get_bh(tbh);
0660         tbh->b_end_io = end_buffer_read_sync;
0661         submit_bh(REQ_OP_READ, tbh);
0662     }
0663 
0664     /* Wait for io completion on all buffer heads. */
0665     for (i = 0; i < nr_bhs; i++) {
0666         struct buffer_head *tbh = bhs[i];
0667 
0668         if (buffer_uptodate(tbh))
0669             continue;
0670         wait_on_buffer(tbh);
0671         /*
0672          * We need an optimization barrier here, otherwise we start
0673          * hitting the below fixup code when accessing a loopback
0674          * mounted ntfs partition. This indicates either there is a
0675          * race condition in the loop driver or, more likely, gcc
0676          * overoptimises the code without the barrier and it doesn't
0677          * do the Right Thing(TM).
0678          */
0679         barrier();
0680         if (unlikely(!buffer_uptodate(tbh))) {
0681             ntfs_warning(vol->sb, "Buffer is unlocked but not "
0682                     "uptodate! Unplugging the disk queue "
0683                     "and rescheduling.");
0684             get_bh(tbh);
0685             io_schedule();
0686             put_bh(tbh);
0687             if (unlikely(!buffer_uptodate(tbh)))
0688                 goto read_err;
0689             ntfs_warning(vol->sb, "Buffer is now uptodate. Good.");
0690         }
0691     }
0692 
0693     /*
0694      * Get the compression buffer. We must not sleep any more
0695      * until we are finished with it.
0696      */
0697     spin_lock(&ntfs_cb_lock);
0698     cb = ntfs_compression_buffer;
0699 
0700     BUG_ON(!cb);
0701 
0702     cb_pos = cb;
0703     cb_end = cb + cb_size;
0704 
0705     /* Copy the buffer heads into the contiguous buffer. */
0706     for (i = 0; i < nr_bhs; i++) {
0707         memcpy(cb_pos, bhs[i]->b_data, block_size);
0708         cb_pos += block_size;
0709     }
0710 
0711     /* Just a precaution. */
0712     if (cb_pos + 2 <= cb + cb_size)
0713         *(u16*)cb_pos = 0;
0714 
0715     /* Reset cb_pos back to the beginning. */
0716     cb_pos = cb;
0717 
0718     /* We now have both source (if present) and destination. */
0719     ntfs_debug("Successfully read the compression block.");
0720 
0721     /* The last page and maximum offset within it for the current cb. */
0722     cb_max_page = (cur_page << PAGE_SHIFT) + cur_ofs + cb_size;
0723     cb_max_ofs = cb_max_page & ~PAGE_MASK;
0724     cb_max_page >>= PAGE_SHIFT;
0725 
0726     /* Catch end of file inside a compression block. */
0727     if (cb_max_page > max_page)
0728         cb_max_page = max_page;
0729 
0730     if (vcn == start_vcn - cb_clusters) {
0731         /* Sparse cb, zero out page range overlapping the cb. */
0732         ntfs_debug("Found sparse compression block.");
0733         /* We can sleep from now on, so we drop lock. */
0734         spin_unlock(&ntfs_cb_lock);
0735         if (cb_max_ofs)
0736             cb_max_page--;
0737         for (; cur_page < cb_max_page; cur_page++) {
0738             page = pages[cur_page];
0739             if (page) {
0740                 if (likely(!cur_ofs))
0741                     clear_page(page_address(page));
0742                 else
0743                     memset(page_address(page) + cur_ofs, 0,
0744                             PAGE_SIZE -
0745                             cur_ofs);
0746                 flush_dcache_page(page);
0747                 kunmap(page);
0748                 SetPageUptodate(page);
0749                 unlock_page(page);
0750                 if (cur_page == xpage)
0751                     xpage_done = 1;
0752                 else
0753                     put_page(page);
0754                 pages[cur_page] = NULL;
0755             }
0756             cb_pos += PAGE_SIZE - cur_ofs;
0757             cur_ofs = 0;
0758             if (cb_pos >= cb_end)
0759                 break;
0760         }
0761         /* If we have a partial final page, deal with it now. */
0762         if (cb_max_ofs && cb_pos < cb_end) {
0763             page = pages[cur_page];
0764             if (page)
0765                 memset(page_address(page) + cur_ofs, 0,
0766                         cb_max_ofs - cur_ofs);
0767             /*
0768              * No need to update cb_pos at this stage:
0769              *  cb_pos += cb_max_ofs - cur_ofs;
0770              */
0771             cur_ofs = cb_max_ofs;
0772         }
0773     } else if (vcn == start_vcn) {
0774         /* We can't sleep so we need two stages. */
0775         unsigned int cur2_page = cur_page;
0776         unsigned int cur_ofs2 = cur_ofs;
0777         u8 *cb_pos2 = cb_pos;
0778 
0779         ntfs_debug("Found uncompressed compression block.");
0780         /* Uncompressed cb, copy it to the destination pages. */
0781         /*
0782          * TODO: As a big optimization, we could detect this case
0783          * before we read all the pages and use block_read_full_folio()
0784          * on all full pages instead (we still have to treat partial
0785          * pages especially but at least we are getting rid of the
0786          * synchronous io for the majority of pages.
0787          * Or if we choose not to do the read-ahead/-behind stuff, we
0788          * could just return block_read_full_folio(pages[xpage]) as long
0789          * as PAGE_SIZE <= cb_size.
0790          */
0791         if (cb_max_ofs)
0792             cb_max_page--;
0793         /* First stage: copy data into destination pages. */
0794         for (; cur_page < cb_max_page; cur_page++) {
0795             page = pages[cur_page];
0796             if (page)
0797                 memcpy(page_address(page) + cur_ofs, cb_pos,
0798                         PAGE_SIZE - cur_ofs);
0799             cb_pos += PAGE_SIZE - cur_ofs;
0800             cur_ofs = 0;
0801             if (cb_pos >= cb_end)
0802                 break;
0803         }
0804         /* If we have a partial final page, deal with it now. */
0805         if (cb_max_ofs && cb_pos < cb_end) {
0806             page = pages[cur_page];
0807             if (page)
0808                 memcpy(page_address(page) + cur_ofs, cb_pos,
0809                         cb_max_ofs - cur_ofs);
0810             cb_pos += cb_max_ofs - cur_ofs;
0811             cur_ofs = cb_max_ofs;
0812         }
0813         /* We can sleep from now on, so drop lock. */
0814         spin_unlock(&ntfs_cb_lock);
0815         /* Second stage: finalize pages. */
0816         for (; cur2_page < cb_max_page; cur2_page++) {
0817             page = pages[cur2_page];
0818             if (page) {
0819                 /*
0820                  * If we are outside the initialized size, zero
0821                  * the out of bounds page range.
0822                  */
0823                 handle_bounds_compressed_page(page, i_size,
0824                         initialized_size);
0825                 flush_dcache_page(page);
0826                 kunmap(page);
0827                 SetPageUptodate(page);
0828                 unlock_page(page);
0829                 if (cur2_page == xpage)
0830                     xpage_done = 1;
0831                 else
0832                     put_page(page);
0833                 pages[cur2_page] = NULL;
0834             }
0835             cb_pos2 += PAGE_SIZE - cur_ofs2;
0836             cur_ofs2 = 0;
0837             if (cb_pos2 >= cb_end)
0838                 break;
0839         }
0840     } else {
0841         /* Compressed cb, decompress it into the destination page(s). */
0842         unsigned int prev_cur_page = cur_page;
0843 
0844         ntfs_debug("Found compressed compression block.");
0845         err = ntfs_decompress(pages, completed_pages, &cur_page,
0846                 &cur_ofs, cb_max_page, cb_max_ofs, xpage,
0847                 &xpage_done, cb_pos, cb_size - (cb_pos - cb),
0848                 i_size, initialized_size);
0849         /*
0850          * We can sleep from now on, lock already dropped by
0851          * ntfs_decompress().
0852          */
0853         if (err) {
0854             ntfs_error(vol->sb, "ntfs_decompress() failed in inode "
0855                     "0x%lx with error code %i. Skipping "
0856                     "this compression block.",
0857                     ni->mft_no, -err);
0858             /* Release the unfinished pages. */
0859             for (; prev_cur_page < cur_page; prev_cur_page++) {
0860                 page = pages[prev_cur_page];
0861                 if (page) {
0862                     flush_dcache_page(page);
0863                     kunmap(page);
0864                     unlock_page(page);
0865                     if (prev_cur_page != xpage)
0866                         put_page(page);
0867                     pages[prev_cur_page] = NULL;
0868                 }
0869             }
0870         }
0871     }
0872 
0873     /* Release the buffer heads. */
0874     for (i = 0; i < nr_bhs; i++)
0875         brelse(bhs[i]);
0876 
0877     /* Do we have more work to do? */
0878     if (nr_cbs)
0879         goto do_next_cb;
0880 
0881     /* We no longer need the list of buffer heads. */
0882     kfree(bhs);
0883 
0884     /* Clean up if we have any pages left. Should never happen. */
0885     for (cur_page = 0; cur_page < max_page; cur_page++) {
0886         page = pages[cur_page];
0887         if (page) {
0888             ntfs_error(vol->sb, "Still have pages left! "
0889                     "Terminating them with extreme "
0890                     "prejudice.  Inode 0x%lx, page index "
0891                     "0x%lx.", ni->mft_no, page->index);
0892             flush_dcache_page(page);
0893             kunmap(page);
0894             unlock_page(page);
0895             if (cur_page != xpage)
0896                 put_page(page);
0897             pages[cur_page] = NULL;
0898         }
0899     }
0900 
0901     /* We no longer need the list of pages. */
0902     kfree(pages);
0903     kfree(completed_pages);
0904 
0905     /* If we have completed the requested page, we return success. */
0906     if (likely(xpage_done))
0907         return 0;
0908 
0909     ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
0910             "EOVERFLOW" : (!err ? "EIO" : "unknown error"));
0911     return err < 0 ? err : -EIO;
0912 
0913 read_err:
0914     ntfs_error(vol->sb, "IO error while reading compressed data.");
0915     /* Release the buffer heads. */
0916     for (i = 0; i < nr_bhs; i++)
0917         brelse(bhs[i]);
0918     goto err_out;
0919 
0920 map_rl_err:
0921     ntfs_error(vol->sb, "ntfs_map_runlist() failed. Cannot read "
0922             "compression block.");
0923     goto err_out;
0924 
0925 rl_err:
0926     up_read(&ni->runlist.lock);
0927     ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read "
0928             "compression block.");
0929     goto err_out;
0930 
0931 getblk_err:
0932     up_read(&ni->runlist.lock);
0933     ntfs_error(vol->sb, "getblk() failed. Cannot read compression block.");
0934 
0935 err_out:
0936     kfree(bhs);
0937     for (i = cur_page; i < max_page; i++) {
0938         page = pages[i];
0939         if (page) {
0940             flush_dcache_page(page);
0941             kunmap(page);
0942             unlock_page(page);
0943             if (i != xpage)
0944                 put_page(page);
0945         }
0946     }
0947     kfree(pages);
0948     kfree(completed_pages);
0949     return -EIO;
0950 }