the-tree/lib/decompress_bunzip2.c

0001 /*  Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
0002
0003     Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
0004     which also acknowledges contributions by Mike Burrows, David Wheeler,
0005     Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
0006     Robert Sedgewick, and Jon L. Bentley.
0007
0008     This code is licensed under the LGPLv2:
0009         LGPL (http://www.gnu.org/copyleft/lgpl.html
0010 */
0011
0012 /*
0013     Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
0014
0015     More efficient reading of Huffman codes, a streamlined read_bunzip()
0016     function, and various other tweaks.  In (limited) tests, approximately
0017     20% faster than bzcat on x86 and about 10% faster on arm.
0018
0019     Note that about 2/3 of the time is spent in read_unzip() reversing
0020     the Burrows-Wheeler transformation.  Much of that time is delay
0021     resulting from cache misses.
0022
0023     I would ask that anyone benefiting from this work, especially those
0024     using it in commercial products, consider making a donation to my local
0025     non-profit hospice organization in the name of the woman I loved, who
0026     passed away Feb. 12, 2003.
0027
0028         In memory of Toni W. Hagan
0029
0030         Hospice of Acadiana, Inc.
0031         2600 Johnston St., Suite 200
0032         Lafayette, LA 70503-3240
0033
0034         Phone (337) 232-1234 or 1-800-738-2226
0035         Fax   (337) 232-1297
0036
0037         https://www.hospiceacadiana.com/
0038
0039     Manuel
0040  */
0041
0042 /*
0043     Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
0044 */
0045
0046
0047 #ifdef STATIC
0048 #define PREBOOT
0049 #else
0050 #include <linux/decompress/bunzip2.h>
0051 #endif /* STATIC */
0052
0053 #include <linux/decompress/mm.h>
0054 #include <linux/crc32poly.h>
0055
0056 #ifndef INT_MAX
0057 #define INT_MAX 0x7fffffff
0058 #endif
0059
0060 /* Constants for Huffman coding */
0061 #define MAX_GROUPS      6
0062 #define GROUP_SIZE          50  /* 64 would have been more efficient */
0063 #define MAX_HUFCODE_BITS    20  /* Longest Huffman code allowed */
0064 #define MAX_SYMBOLS         258 /* 256 literals + RUNA + RUNB */
0065 #define SYMBOL_RUNA     0
0066 #define SYMBOL_RUNB     1
0067
0068 /* Status return values */
0069 #define RETVAL_OK           0
0070 #define RETVAL_LAST_BLOCK       (-1)
0071 #define RETVAL_NOT_BZIP_DATA        (-2)
0072 #define RETVAL_UNEXPECTED_INPUT_EOF (-3)
0073 #define RETVAL_UNEXPECTED_OUTPUT_EOF    (-4)
0074 #define RETVAL_DATA_ERROR       (-5)
0075 #define RETVAL_OUT_OF_MEMORY        (-6)
0076 #define RETVAL_OBSOLETE_INPUT       (-7)
0077
0078 /* Other housekeeping constants */
0079 #define BZIP2_IOBUF_SIZE        4096
0080
0081 /* This is what we know about each Huffman coding group */
0082 struct group_data {
0083     /* We have an extra slot at the end of limit[] for a sentinel value. */
0084     int limit[MAX_HUFCODE_BITS+1];
0085     int base[MAX_HUFCODE_BITS];
0086     int permute[MAX_SYMBOLS];
0087     int minLen, maxLen;
0088 };
0089
0090 /* Structure holding all the housekeeping data, including IO buffers and
0091    memory that persists between calls to bunzip */
0092 struct bunzip_data {
0093     /* State for interrupting output loop */
0094     int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
0095     /* I/O tracking data (file handles, buffers, positions, etc.) */
0096     long (*fill)(void*, unsigned long);
0097     long inbufCount, inbufPos /*, outbufPos*/;
0098     unsigned char *inbuf /*,*outbuf*/;
0099     unsigned int inbufBitCount, inbufBits;
0100     /* The CRC values stored in the block header and calculated from the
0101     data */
0102     unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
0103     /* Intermediate buffer and its size (in bytes) */
0104     unsigned int *dbuf, dbufSize;
0105     /* These things are a bit too big to go on the stack */
0106     unsigned char selectors[32768];     /* nSelectors = 15 bits */
0107     struct group_data groups[MAX_GROUPS];   /* Huffman coding tables */
0108     int io_error;           /* non-zero if we have IO error */
0109     int byteCount[256];
0110     unsigned char symToByte[256], mtfSymbol[256];
0111 };
0112
0113
0114 /* Return the next nnn bits of input.  All reads from the compressed input
0115    are done through this function.  All reads are big endian */
0116 static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
0117 {
0118     unsigned int bits = 0;
0119
0120     /* If we need to get more data from the byte buffer, do so.
0121        (Loop getting one byte at a time to enforce endianness and avoid
0122        unaligned access.) */
0123     while (bd->inbufBitCount < bits_wanted) {
0124         /* If we need to read more data from file into byte buffer, do
0125            so */
0126         if (bd->inbufPos == bd->inbufCount) {
0127             if (bd->io_error)
0128                 return 0;
0129             bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
0130             if (bd->inbufCount <= 0) {
0131                 bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
0132                 return 0;
0133             }
0134             bd->inbufPos = 0;
0135         }
0136         /* Avoid 32-bit overflow (dump bit buffer to top of output) */
0137         if (bd->inbufBitCount >= 24) {
0138             bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
0139             bits_wanted -= bd->inbufBitCount;
0140             bits <<= bits_wanted;
0141             bd->inbufBitCount = 0;
0142         }
0143         /* Grab next 8 bits of input from buffer. */
0144         bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
0145         bd->inbufBitCount += 8;
0146     }
0147     /* Calculate result */
0148     bd->inbufBitCount -= bits_wanted;
0149     bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
0150
0151     return bits;
0152 }
0153
0154 /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
0155
0156 static int INIT get_next_block(struct bunzip_data *bd)
0157 {
0158     struct group_data *hufGroup = NULL;
0159     int *base = NULL;
0160     int *limit = NULL;
0161     int dbufCount, nextSym, dbufSize, groupCount, selector,
0162         i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
0163     unsigned char uc, *symToByte, *mtfSymbol, *selectors;
0164     unsigned int *dbuf, origPtr;
0165
0166     dbuf = bd->dbuf;
0167     dbufSize = bd->dbufSize;
0168     selectors = bd->selectors;
0169     byteCount = bd->byteCount;
0170     symToByte = bd->symToByte;
0171     mtfSymbol = bd->mtfSymbol;
0172
0173     /* Read in header signature and CRC, then validate signature.
0174        (last block signature means CRC is for whole file, return now) */
0175     i = get_bits(bd, 24);
0176     j = get_bits(bd, 24);
0177     bd->headerCRC = get_bits(bd, 32);
0178     if ((i == 0x177245) && (j == 0x385090))
0179         return RETVAL_LAST_BLOCK;
0180     if ((i != 0x314159) || (j != 0x265359))
0181         return RETVAL_NOT_BZIP_DATA;
0182     /* We can add support for blockRandomised if anybody complains.
0183        There was some code for this in busybox 1.0.0-pre3, but nobody ever
0184        noticed that it didn't actually work. */
0185     if (get_bits(bd, 1))
0186         return RETVAL_OBSOLETE_INPUT;
0187     origPtr = get_bits(bd, 24);
0188     if (origPtr >= dbufSize)
0189         return RETVAL_DATA_ERROR;
0190     /* mapping table: if some byte values are never used (encoding things
0191        like ascii text), the compression code removes the gaps to have fewer
0192        symbols to deal with, and writes a sparse bitfield indicating which
0193        values were present.  We make a translation table to convert the
0194        symbols back to the corresponding bytes. */
0195     t = get_bits(bd, 16);
0196     symTotal = 0;
0197     for (i = 0; i < 16; i++) {
0198         if (t&(1 << (15-i))) {
0199             k = get_bits(bd, 16);
0200             for (j = 0; j < 16; j++)
0201                 if (k&(1 << (15-j)))
0202                     symToByte[symTotal++] = (16*i)+j;
0203         }
0204     }
0205     /* How many different Huffman coding groups does this block use? */
0206     groupCount = get_bits(bd, 3);
0207     if (groupCount < 2 || groupCount > MAX_GROUPS)
0208         return RETVAL_DATA_ERROR;
0209     /* nSelectors: Every GROUP_SIZE many symbols we select a new
0210        Huffman coding group.  Read in the group selector list,
0211        which is stored as MTF encoded bit runs.  (MTF = Move To
0212        Front, as each value is used it's moved to the start of the
0213        list.) */
0214     nSelectors = get_bits(bd, 15);
0215     if (!nSelectors)
0216         return RETVAL_DATA_ERROR;
0217     for (i = 0; i < groupCount; i++)
0218         mtfSymbol[i] = i;
0219     for (i = 0; i < nSelectors; i++) {
0220         /* Get next value */
0221         for (j = 0; get_bits(bd, 1); j++)
0222             if (j >= groupCount)
0223                 return RETVAL_DATA_ERROR;
0224         /* Decode MTF to get the next selector */
0225         uc = mtfSymbol[j];
0226         for (; j; j--)
0227             mtfSymbol[j] = mtfSymbol[j-1];
0228         mtfSymbol[0] = selectors[i] = uc;
0229     }
0230     /* Read the Huffman coding tables for each group, which code
0231        for symTotal literal symbols, plus two run symbols (RUNA,
0232        RUNB) */
0233     symCount = symTotal+2;
0234     for (j = 0; j < groupCount; j++) {
0235         unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
0236         int minLen, maxLen, pp;
0237         /* Read Huffman code lengths for each symbol.  They're
0238            stored in a way similar to mtf; record a starting
0239            value for the first symbol, and an offset from the
0240            previous value for everys symbol after that.
0241            (Subtracting 1 before the loop and then adding it
0242            back at the end is an optimization that makes the
0243            test inside the loop simpler: symbol length 0
0244            becomes negative, so an unsigned inequality catches
0245            it.) */
0246         t = get_bits(bd, 5)-1;
0247         for (i = 0; i < symCount; i++) {
0248             for (;;) {
0249                 if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
0250                     return RETVAL_DATA_ERROR;
0251
0252                 /* If first bit is 0, stop.  Else
0253                    second bit indicates whether to
0254                    increment or decrement the value.
0255                    Optimization: grab 2 bits and unget
0256                    the second if the first was 0. */
0257
0258                 k = get_bits(bd, 2);
0259                 if (k < 2) {
0260                     bd->inbufBitCount++;
0261                     break;
0262                 }
0263                 /* Add one if second bit 1, else
0264                  * subtract 1.  Avoids if/else */
0265                 t += (((k+1)&2)-1);
0266             }
0267             /* Correct for the initial -1, to get the
0268              * final symbol length */
0269             length[i] = t+1;
0270         }
0271         /* Find largest and smallest lengths in this group */
0272         minLen = maxLen = length[0];
0273
0274         for (i = 1; i < symCount; i++) {
0275             if (length[i] > maxLen)
0276                 maxLen = length[i];
0277             else if (length[i] < minLen)
0278                 minLen = length[i];
0279         }
0280
0281         /* Calculate permute[], base[], and limit[] tables from
0282          * length[].
0283          *
0284          * permute[] is the lookup table for converting
0285          * Huffman coded symbols into decoded symbols.  base[]
0286          * is the amount to subtract from the value of a
0287          * Huffman symbol of a given length when using
0288          * permute[].
0289          *
0290          * limit[] indicates the largest numerical value a
0291          * symbol with a given number of bits can have.  This
0292          * is how the Huffman codes can vary in length: each
0293          * code with a value > limit[length] needs another
0294          * bit.
0295          */
0296         hufGroup = bd->groups+j;
0297         hufGroup->minLen = minLen;
0298         hufGroup->maxLen = maxLen;
0299         /* Note that minLen can't be smaller than 1, so we
0300            adjust the base and limit array pointers so we're
0301            not always wasting the first entry.  We do this
0302            again when using them (during symbol decoding).*/
0303         base = hufGroup->base-1;
0304         limit = hufGroup->limit-1;
0305         /* Calculate permute[].  Concurrently, initialize
0306          * temp[] and limit[]. */
0307         pp = 0;
0308         for (i = minLen; i <= maxLen; i++) {
0309             temp[i] = limit[i] = 0;
0310             for (t = 0; t < symCount; t++)
0311                 if (length[t] == i)
0312                     hufGroup->permute[pp++] = t;
0313         }
0314         /* Count symbols coded for at each bit length */
0315         for (i = 0; i < symCount; i++)
0316             temp[length[i]]++;
0317         /* Calculate limit[] (the largest symbol-coding value
0318          *at each bit length, which is (previous limit <<
0319          *1)+symbols at this level), and base[] (number of
0320          *symbols to ignore at each bit length, which is limit
0321          *minus the cumulative count of symbols coded for
0322          *already). */
0323         pp = t = 0;
0324         for (i = minLen; i < maxLen; i++) {
0325             pp += temp[i];
0326             /* We read the largest possible symbol size
0327                and then unget bits after determining how
0328                many we need, and those extra bits could be
0329                set to anything.  (They're noise from
0330                future symbols.)  At each level we're
0331                really only interested in the first few
0332                bits, so here we set all the trailing
0333                to-be-ignored bits to 1 so they don't
0334                affect the value > limit[length]
0335                comparison. */
0336             limit[i] = (pp << (maxLen - i)) - 1;
0337             pp <<= 1;
0338             base[i+1] = pp-(t += temp[i]);
0339         }
0340         limit[maxLen+1] = INT_MAX; /* Sentinel value for
0341                         * reading next sym. */
0342         limit[maxLen] = pp+temp[maxLen]-1;
0343         base[minLen] = 0;
0344     }
0345     /* We've finished reading and digesting the block header.  Now
0346        read this block's Huffman coded symbols from the file and
0347        undo the Huffman coding and run length encoding, saving the
0348        result into dbuf[dbufCount++] = uc */
0349
0350     /* Initialize symbol occurrence counters and symbol Move To
0351      * Front table */
0352     for (i = 0; i < 256; i++) {
0353         byteCount[i] = 0;
0354         mtfSymbol[i] = (unsigned char)i;
0355     }
0356     /* Loop through compressed symbols. */
0357     runPos = dbufCount = symCount = selector = 0;
0358     for (;;) {
0359         /* Determine which Huffman coding group to use. */
0360         if (!(symCount--)) {
0361             symCount = GROUP_SIZE-1;
0362             if (selector >= nSelectors)
0363                 return RETVAL_DATA_ERROR;
0364             hufGroup = bd->groups+selectors[selector++];
0365             base = hufGroup->base-1;
0366             limit = hufGroup->limit-1;
0367         }
0368         /* Read next Huffman-coded symbol. */
0369         /* Note: It is far cheaper to read maxLen bits and
0370            back up than it is to read minLen bits and then an
0371            additional bit at a time, testing as we go.
0372            Because there is a trailing last block (with file
0373            CRC), there is no danger of the overread causing an
0374            unexpected EOF for a valid compressed file.  As a
0375            further optimization, we do the read inline
0376            (falling back to a call to get_bits if the buffer
0377            runs dry).  The following (up to got_huff_bits:) is
0378            equivalent to j = get_bits(bd, hufGroup->maxLen);
0379          */
0380         while (bd->inbufBitCount < hufGroup->maxLen) {
0381             if (bd->inbufPos == bd->inbufCount) {
0382                 j = get_bits(bd, hufGroup->maxLen);
0383                 goto got_huff_bits;
0384             }
0385             bd->inbufBits =
0386                 (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
0387             bd->inbufBitCount += 8;
0388         }
0389         bd->inbufBitCount -= hufGroup->maxLen;
0390         j = (bd->inbufBits >> bd->inbufBitCount)&
0391             ((1 << hufGroup->maxLen)-1);
0392 got_huff_bits:
0393         /* Figure how many bits are in next symbol and
0394          * unget extras */
0395         i = hufGroup->minLen;
0396         while (j > limit[i])
0397             ++i;
0398         bd->inbufBitCount += (hufGroup->maxLen - i);
0399         /* Huffman decode value to get nextSym (with bounds checking) */
0400         if ((i > hufGroup->maxLen)
0401             || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
0402                 >= MAX_SYMBOLS))
0403             return RETVAL_DATA_ERROR;
0404         nextSym = hufGroup->permute[j];
0405         /* We have now decoded the symbol, which indicates
0406            either a new literal byte, or a repeated run of the
0407            most recent literal byte.  First, check if nextSym
0408            indicates a repeated run, and if so loop collecting
0409            how many times to repeat the last literal. */
0410         if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
0411             /* If this is the start of a new run, zero out
0412              * counter */
0413             if (!runPos) {
0414                 runPos = 1;
0415                 t = 0;
0416             }
0417             /* Neat trick that saves 1 symbol: instead of
0418                or-ing 0 or 1 at each bit position, add 1
0419                or 2 instead.  For example, 1011 is 1 << 0
0420                + 1 << 1 + 2 << 2.  1010 is 2 << 0 + 2 << 1
0421                + 1 << 2.  You can make any bit pattern
0422                that way using 1 less symbol than the basic
0423                or 0/1 method (except all bits 0, which
0424                would use no symbols, but a run of length 0
0425                doesn't mean anything in this context).
0426                Thus space is saved. */
0427             t += (runPos << nextSym);
0428             /* +runPos if RUNA; +2*runPos if RUNB */
0429
0430             runPos <<= 1;
0431             continue;
0432         }
0433         /* When we hit the first non-run symbol after a run,
0434            we now know how many times to repeat the last
0435            literal, so append that many copies to our buffer
0436            of decoded symbols (dbuf) now.  (The last literal
0437            used is the one at the head of the mtfSymbol
0438            array.) */
0439         if (runPos) {
0440             runPos = 0;
0441             if (dbufCount+t >= dbufSize)
0442                 return RETVAL_DATA_ERROR;
0443
0444             uc = symToByte[mtfSymbol[0]];
0445             byteCount[uc] += t;
0446             while (t--)
0447                 dbuf[dbufCount++] = uc;
0448         }
0449         /* Is this the terminating symbol? */
0450         if (nextSym > symTotal)
0451             break;
0452         /* At this point, nextSym indicates a new literal
0453            character.  Subtract one to get the position in the
0454            MTF array at which this literal is currently to be
0455            found.  (Note that the result can't be -1 or 0,
0456            because 0 and 1 are RUNA and RUNB.  But another
0457            instance of the first symbol in the mtf array,
0458            position 0, would have been handled as part of a
0459            run above.  Therefore 1 unused mtf position minus 2
0460            non-literal nextSym values equals -1.) */
0461         if (dbufCount >= dbufSize)
0462             return RETVAL_DATA_ERROR;
0463         i = nextSym - 1;
0464         uc = mtfSymbol[i];
0465         /* Adjust the MTF array.  Since we typically expect to
0466          *move only a small number of symbols, and are bound
0467          *by 256 in any case, using memmove here would
0468          *typically be bigger and slower due to function call
0469          *overhead and other assorted setup costs. */
0470         do {
0471             mtfSymbol[i] = mtfSymbol[i-1];
0472         } while (--i);
0473         mtfSymbol[0] = uc;
0474         uc = symToByte[uc];
0475         /* We have our literal byte.  Save it into dbuf. */
0476         byteCount[uc]++;
0477         dbuf[dbufCount++] = (unsigned int)uc;
0478     }
0479     /* At this point, we've read all the Huffman-coded symbols
0480        (and repeated runs) for this block from the input stream,
0481        and decoded them into the intermediate buffer.  There are
0482        dbufCount many decoded bytes in dbuf[].  Now undo the
0483        Burrows-Wheeler transform on dbuf.  See
0484        http://dogma.net/markn/articles/bwt/bwt.htm
0485      */
0486     /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
0487     j = 0;
0488     for (i = 0; i < 256; i++) {
0489         k = j+byteCount[i];
0490         byteCount[i] = j;
0491         j = k;
0492     }
0493     /* Figure out what order dbuf would be in if we sorted it. */
0494     for (i = 0; i < dbufCount; i++) {
0495         uc = (unsigned char)(dbuf[i] & 0xff);
0496         dbuf[byteCount[uc]] |= (i << 8);
0497         byteCount[uc]++;
0498     }
0499     /* Decode first byte by hand to initialize "previous" byte.
0500        Note that it doesn't get output, and if the first three
0501        characters are identical it doesn't qualify as a run (hence
0502        writeRunCountdown = 5). */
0503     if (dbufCount) {
0504         if (origPtr >= dbufCount)
0505             return RETVAL_DATA_ERROR;
0506         bd->writePos = dbuf[origPtr];
0507         bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
0508         bd->writePos >>= 8;
0509         bd->writeRunCountdown = 5;
0510     }
0511     bd->writeCount = dbufCount;
0512
0513     return RETVAL_OK;
0514 }
0515
0516 /* Undo burrows-wheeler transform on intermediate buffer to produce output.
0517    If start_bunzip was initialized with out_fd =-1, then up to len bytes of
0518    data are written to outbuf.  Return value is number of bytes written or
0519    error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
0520    are ignored, data is written to out_fd and return is RETVAL_OK or error.
0521 */
0522
0523 static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
0524 {
0525     const unsigned int *dbuf;
0526     int pos, xcurrent, previous, gotcount;
0527
0528     /* If last read was short due to end of file, return last block now */
0529     if (bd->writeCount < 0)
0530         return bd->writeCount;
0531
0532     gotcount = 0;
0533     dbuf = bd->dbuf;
0534     pos = bd->writePos;
0535     xcurrent = bd->writeCurrent;
0536
0537     /* We will always have pending decoded data to write into the output
0538        buffer unless this is the very first call (in which case we haven't
0539        Huffman-decoded a block into the intermediate buffer yet). */
0540
0541     if (bd->writeCopies) {
0542         /* Inside the loop, writeCopies means extra copies (beyond 1) */
0543         --bd->writeCopies;
0544         /* Loop outputting bytes */
0545         for (;;) {
0546             /* If the output buffer is full, snapshot
0547              * state and return */
0548             if (gotcount >= len) {
0549                 bd->writePos = pos;
0550                 bd->writeCurrent = xcurrent;
0551                 bd->writeCopies++;
0552                 return len;
0553             }
0554             /* Write next byte into output buffer, updating CRC */
0555             outbuf[gotcount++] = xcurrent;
0556             bd->writeCRC = (((bd->writeCRC) << 8)
0557                 ^bd->crc32Table[((bd->writeCRC) >> 24)
0558                 ^xcurrent]);
0559             /* Loop now if we're outputting multiple
0560              * copies of this byte */
0561             if (bd->writeCopies) {
0562                 --bd->writeCopies;
0563                 continue;
0564             }
0565 decode_next_byte:
0566             if (!bd->writeCount--)
0567                 break;
0568             /* Follow sequence vector to undo
0569              * Burrows-Wheeler transform */
0570             previous = xcurrent;
0571             pos = dbuf[pos];
0572             xcurrent = pos&0xff;
0573             pos >>= 8;
0574             /* After 3 consecutive copies of the same
0575                byte, the 4th is a repeat count.  We count
0576                down from 4 instead *of counting up because
0577                testing for non-zero is faster */
0578             if (--bd->writeRunCountdown) {
0579                 if (xcurrent != previous)
0580                     bd->writeRunCountdown = 4;
0581             } else {
0582                 /* We have a repeated run, this byte
0583                  * indicates the count */
0584                 bd->writeCopies = xcurrent;
0585                 xcurrent = previous;
0586                 bd->writeRunCountdown = 5;
0587                 /* Sometimes there are just 3 bytes
0588                  * (run length 0) */
0589                 if (!bd->writeCopies)
0590                     goto decode_next_byte;
0591                 /* Subtract the 1 copy we'd output
0592                  * anyway to get extras */
0593                 --bd->writeCopies;
0594             }
0595         }
0596         /* Decompression of this block completed successfully */
0597         bd->writeCRC = ~bd->writeCRC;
0598         bd->totalCRC = ((bd->totalCRC << 1) |
0599                 (bd->totalCRC >> 31)) ^ bd->writeCRC;
0600         /* If this block had a CRC error, force file level CRC error. */
0601         if (bd->writeCRC != bd->headerCRC) {
0602             bd->totalCRC = bd->headerCRC+1;
0603             return RETVAL_LAST_BLOCK;
0604         }
0605     }
0606
0607     /* Refill the intermediate buffer by Huffman-decoding next
0608      * block of input */
0609     /* (previous is just a convenient unused temp variable here) */
0610     previous = get_next_block(bd);
0611     if (previous) {
0612         bd->writeCount = previous;
0613         return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
0614     }
0615     bd->writeCRC = 0xffffffffUL;
0616     pos = bd->writePos;
0617     xcurrent = bd->writeCurrent;
0618     goto decode_next_byte;
0619 }
0620
0621 static long INIT nofill(void *buf, unsigned long len)
0622 {
0623     return -1;
0624 }
0625
0626 /* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
0627    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
0628    ignored, and data is read from file handle into temporary buffer. */
0629 static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
0630                  long (*fill)(void*, unsigned long))
0631 {
0632     struct bunzip_data *bd;
0633     unsigned int i, j, c;
0634     const unsigned int BZh0 =
0635         (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
0636         +(((unsigned int)'h') << 8)+(unsigned int)'0';
0637
0638     /* Figure out how much data to allocate */
0639     i = sizeof(struct bunzip_data);
0640
0641     /* Allocate bunzip_data.  Most fields initialize to zero. */
0642     bd = *bdp = malloc(i);
0643     if (!bd)
0644         return RETVAL_OUT_OF_MEMORY;
0645     memset(bd, 0, sizeof(struct bunzip_data));
0646     /* Setup input buffer */
0647     bd->inbuf = inbuf;
0648     bd->inbufCount = len;
0649     if (fill != NULL)
0650         bd->fill = fill;
0651     else
0652         bd->fill = nofill;
0653
0654     /* Init the CRC32 table (big endian) */
0655     for (i = 0; i < 256; i++) {
0656         c = i << 24;
0657         for (j = 8; j; j--)
0658             c = c&0x80000000 ? (c << 1)^(CRC32_POLY_BE) : (c << 1);
0659         bd->crc32Table[i] = c;
0660     }
0661
0662     /* Ensure that file starts with "BZh['1'-'9']." */
0663     i = get_bits(bd, 32);
0664     if (((unsigned int)(i-BZh0-1)) >= 9)
0665         return RETVAL_NOT_BZIP_DATA;
0666
0667     /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
0668        uncompressed data.  Allocate intermediate buffer for block. */
0669     bd->dbufSize = 100000*(i-BZh0);
0670
0671     bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
0672     if (!bd->dbuf)
0673         return RETVAL_OUT_OF_MEMORY;
0674     return RETVAL_OK;
0675 }
0676
0677 /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
0678    not end of file.) */
0679 STATIC int INIT bunzip2(unsigned char *buf, long len,
0680             long (*fill)(void*, unsigned long),
0681             long (*flush)(void*, unsigned long),
0682             unsigned char *outbuf,
0683             long *pos,
0684             void(*error)(char *x))
0685 {
0686     struct bunzip_data *bd;
0687     int i = -1;
0688     unsigned char *inbuf;
0689
0690     if (flush)
0691         outbuf = malloc(BZIP2_IOBUF_SIZE);
0692
0693     if (!outbuf) {
0694         error("Could not allocate output buffer");
0695         return RETVAL_OUT_OF_MEMORY;
0696     }
0697     if (buf)
0698         inbuf = buf;
0699     else
0700         inbuf = malloc(BZIP2_IOBUF_SIZE);
0701     if (!inbuf) {
0702         error("Could not allocate input buffer");
0703         i = RETVAL_OUT_OF_MEMORY;
0704         goto exit_0;
0705     }
0706     i = start_bunzip(&bd, inbuf, len, fill);
0707     if (!i) {
0708         for (;;) {
0709             i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
0710             if (i <= 0)
0711                 break;
0712             if (!flush)
0713                 outbuf += i;
0714             else
0715                 if (i != flush(outbuf, i)) {
0716                     i = RETVAL_UNEXPECTED_OUTPUT_EOF;
0717                     break;
0718                 }
0719         }
0720     }
0721     /* Check CRC and release memory */
0722     if (i == RETVAL_LAST_BLOCK) {
0723         if (bd->headerCRC != bd->totalCRC)
0724             error("Data integrity error when decompressing.");
0725         else
0726             i = RETVAL_OK;
0727     } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
0728         error("Compressed file ends unexpectedly");
0729     }
0730     if (!bd)
0731         goto exit_1;
0732     if (bd->dbuf)
0733         large_free(bd->dbuf);
0734     if (pos)
0735         *pos = bd->inbufPos;
0736     free(bd);
0737 exit_1:
0738     if (!buf)
0739         free(inbuf);
0740 exit_0:
0741     if (flush)
0742         free(outbuf);
0743     return i;
0744 }
0745
0746 #ifdef PREBOOT
0747 STATIC int INIT __decompress(unsigned char *buf, long len,
0748             long (*fill)(void*, unsigned long),
0749             long (*flush)(void*, unsigned long),
0750             unsigned char *outbuf, long olen,
0751             long *pos,
0752             void (*error)(char *x))
0753 {
0754     return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
0755 }
0756 #endif