Back to home page

LXR

 
 

    


0001 /*  Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
0002 
0003     Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
0004     which also acknowledges contributions by Mike Burrows, David Wheeler,
0005     Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
0006     Robert Sedgewick, and Jon L. Bentley.
0007 
0008     This code is licensed under the LGPLv2:
0009         LGPL (http://www.gnu.org/copyleft/lgpl.html
0010 */
0011 
0012 /*
0013     Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
0014 
0015     More efficient reading of Huffman codes, a streamlined read_bunzip()
0016     function, and various other tweaks.  In (limited) tests, approximately
0017     20% faster than bzcat on x86 and about 10% faster on arm.
0018 
0019     Note that about 2/3 of the time is spent in read_unzip() reversing
0020     the Burrows-Wheeler transformation.  Much of that time is delay
0021     resulting from cache misses.
0022 
0023     I would ask that anyone benefiting from this work, especially those
0024     using it in commercial products, consider making a donation to my local
0025     non-profit hospice organization in the name of the woman I loved, who
0026     passed away Feb. 12, 2003.
0027 
0028         In memory of Toni W. Hagan
0029 
0030         Hospice of Acadiana, Inc.
0031         2600 Johnston St., Suite 200
0032         Lafayette, LA 70503-3240
0033 
0034         Phone (337) 232-1234 or 1-800-738-2226
0035         Fax   (337) 232-1297
0036 
0037         http://www.hospiceacadiana.com/
0038 
0039     Manuel
0040  */
0041 
0042 /*
0043     Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
0044 */
0045 
0046 
0047 #ifdef STATIC
0048 #define PREBOOT
0049 #else
0050 #include <linux/decompress/bunzip2.h>
0051 #endif /* STATIC */
0052 
0053 #include <linux/decompress/mm.h>
0054 
0055 #ifndef INT_MAX
0056 #define INT_MAX 0x7fffffff
0057 #endif
0058 
0059 /* Constants for Huffman coding */
0060 #define MAX_GROUPS      6
0061 #define GROUP_SIZE          50  /* 64 would have been more efficient */
0062 #define MAX_HUFCODE_BITS    20  /* Longest Huffman code allowed */
0063 #define MAX_SYMBOLS         258 /* 256 literals + RUNA + RUNB */
0064 #define SYMBOL_RUNA     0
0065 #define SYMBOL_RUNB     1
0066 
0067 /* Status return values */
0068 #define RETVAL_OK           0
0069 #define RETVAL_LAST_BLOCK       (-1)
0070 #define RETVAL_NOT_BZIP_DATA        (-2)
0071 #define RETVAL_UNEXPECTED_INPUT_EOF (-3)
0072 #define RETVAL_UNEXPECTED_OUTPUT_EOF    (-4)
0073 #define RETVAL_DATA_ERROR       (-5)
0074 #define RETVAL_OUT_OF_MEMORY        (-6)
0075 #define RETVAL_OBSOLETE_INPUT       (-7)
0076 
0077 /* Other housekeeping constants */
0078 #define BZIP2_IOBUF_SIZE        4096
0079 
0080 /* This is what we know about each Huffman coding group */
0081 struct group_data {
0082     /* We have an extra slot at the end of limit[] for a sentinal value. */
0083     int limit[MAX_HUFCODE_BITS+1];
0084     int base[MAX_HUFCODE_BITS];
0085     int permute[MAX_SYMBOLS];
0086     int minLen, maxLen;
0087 };
0088 
0089 /* Structure holding all the housekeeping data, including IO buffers and
0090    memory that persists between calls to bunzip */
0091 struct bunzip_data {
0092     /* State for interrupting output loop */
0093     int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
0094     /* I/O tracking data (file handles, buffers, positions, etc.) */
0095     long (*fill)(void*, unsigned long);
0096     long inbufCount, inbufPos /*, outbufPos*/;
0097     unsigned char *inbuf /*,*outbuf*/;
0098     unsigned int inbufBitCount, inbufBits;
0099     /* The CRC values stored in the block header and calculated from the
0100     data */
0101     unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
0102     /* Intermediate buffer and its size (in bytes) */
0103     unsigned int *dbuf, dbufSize;
0104     /* These things are a bit too big to go on the stack */
0105     unsigned char selectors[32768];     /* nSelectors = 15 bits */
0106     struct group_data groups[MAX_GROUPS];   /* Huffman coding tables */
0107     int io_error;           /* non-zero if we have IO error */
0108     int byteCount[256];
0109     unsigned char symToByte[256], mtfSymbol[256];
0110 };
0111 
0112 
0113 /* Return the next nnn bits of input.  All reads from the compressed input
0114    are done through this function.  All reads are big endian */
0115 static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
0116 {
0117     unsigned int bits = 0;
0118 
0119     /* If we need to get more data from the byte buffer, do so.
0120        (Loop getting one byte at a time to enforce endianness and avoid
0121        unaligned access.) */
0122     while (bd->inbufBitCount < bits_wanted) {
0123         /* If we need to read more data from file into byte buffer, do
0124            so */
0125         if (bd->inbufPos == bd->inbufCount) {
0126             if (bd->io_error)
0127                 return 0;
0128             bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
0129             if (bd->inbufCount <= 0) {
0130                 bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
0131                 return 0;
0132             }
0133             bd->inbufPos = 0;
0134         }
0135         /* Avoid 32-bit overflow (dump bit buffer to top of output) */
0136         if (bd->inbufBitCount >= 24) {
0137             bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
0138             bits_wanted -= bd->inbufBitCount;
0139             bits <<= bits_wanted;
0140             bd->inbufBitCount = 0;
0141         }
0142         /* Grab next 8 bits of input from buffer. */
0143         bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
0144         bd->inbufBitCount += 8;
0145     }
0146     /* Calculate result */
0147     bd->inbufBitCount -= bits_wanted;
0148     bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
0149 
0150     return bits;
0151 }
0152 
0153 /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
0154 
0155 static int INIT get_next_block(struct bunzip_data *bd)
0156 {
0157     struct group_data *hufGroup = NULL;
0158     int *base = NULL;
0159     int *limit = NULL;
0160     int dbufCount, nextSym, dbufSize, groupCount, selector,
0161         i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
0162     unsigned char uc, *symToByte, *mtfSymbol, *selectors;
0163     unsigned int *dbuf, origPtr;
0164 
0165     dbuf = bd->dbuf;
0166     dbufSize = bd->dbufSize;
0167     selectors = bd->selectors;
0168     byteCount = bd->byteCount;
0169     symToByte = bd->symToByte;
0170     mtfSymbol = bd->mtfSymbol;
0171 
0172     /* Read in header signature and CRC, then validate signature.
0173        (last block signature means CRC is for whole file, return now) */
0174     i = get_bits(bd, 24);
0175     j = get_bits(bd, 24);
0176     bd->headerCRC = get_bits(bd, 32);
0177     if ((i == 0x177245) && (j == 0x385090))
0178         return RETVAL_LAST_BLOCK;
0179     if ((i != 0x314159) || (j != 0x265359))
0180         return RETVAL_NOT_BZIP_DATA;
0181     /* We can add support for blockRandomised if anybody complains.
0182        There was some code for this in busybox 1.0.0-pre3, but nobody ever
0183        noticed that it didn't actually work. */
0184     if (get_bits(bd, 1))
0185         return RETVAL_OBSOLETE_INPUT;
0186     origPtr = get_bits(bd, 24);
0187     if (origPtr >= dbufSize)
0188         return RETVAL_DATA_ERROR;
0189     /* mapping table: if some byte values are never used (encoding things
0190        like ascii text), the compression code removes the gaps to have fewer
0191        symbols to deal with, and writes a sparse bitfield indicating which
0192        values were present.  We make a translation table to convert the
0193        symbols back to the corresponding bytes. */
0194     t = get_bits(bd, 16);
0195     symTotal = 0;
0196     for (i = 0; i < 16; i++) {
0197         if (t&(1 << (15-i))) {
0198             k = get_bits(bd, 16);
0199             for (j = 0; j < 16; j++)
0200                 if (k&(1 << (15-j)))
0201                     symToByte[symTotal++] = (16*i)+j;
0202         }
0203     }
0204     /* How many different Huffman coding groups does this block use? */
0205     groupCount = get_bits(bd, 3);
0206     if (groupCount < 2 || groupCount > MAX_GROUPS)
0207         return RETVAL_DATA_ERROR;
0208     /* nSelectors: Every GROUP_SIZE many symbols we select a new
0209        Huffman coding group.  Read in the group selector list,
0210        which is stored as MTF encoded bit runs.  (MTF = Move To
0211        Front, as each value is used it's moved to the start of the
0212        list.) */
0213     nSelectors = get_bits(bd, 15);
0214     if (!nSelectors)
0215         return RETVAL_DATA_ERROR;
0216     for (i = 0; i < groupCount; i++)
0217         mtfSymbol[i] = i;
0218     for (i = 0; i < nSelectors; i++) {
0219         /* Get next value */
0220         for (j = 0; get_bits(bd, 1); j++)
0221             if (j >= groupCount)
0222                 return RETVAL_DATA_ERROR;
0223         /* Decode MTF to get the next selector */
0224         uc = mtfSymbol[j];
0225         for (; j; j--)
0226             mtfSymbol[j] = mtfSymbol[j-1];
0227         mtfSymbol[0] = selectors[i] = uc;
0228     }
0229     /* Read the Huffman coding tables for each group, which code
0230        for symTotal literal symbols, plus two run symbols (RUNA,
0231        RUNB) */
0232     symCount = symTotal+2;
0233     for (j = 0; j < groupCount; j++) {
0234         unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
0235         int minLen, maxLen, pp;
0236         /* Read Huffman code lengths for each symbol.  They're
0237            stored in a way similar to mtf; record a starting
0238            value for the first symbol, and an offset from the
0239            previous value for everys symbol after that.
0240            (Subtracting 1 before the loop and then adding it
0241            back at the end is an optimization that makes the
0242            test inside the loop simpler: symbol length 0
0243            becomes negative, so an unsigned inequality catches
0244            it.) */
0245         t = get_bits(bd, 5)-1;
0246         for (i = 0; i < symCount; i++) {
0247             for (;;) {
0248                 if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
0249                     return RETVAL_DATA_ERROR;
0250 
0251                 /* If first bit is 0, stop.  Else
0252                    second bit indicates whether to
0253                    increment or decrement the value.
0254                    Optimization: grab 2 bits and unget
0255                    the second if the first was 0. */
0256 
0257                 k = get_bits(bd, 2);
0258                 if (k < 2) {
0259                     bd->inbufBitCount++;
0260                     break;
0261                 }
0262                 /* Add one if second bit 1, else
0263                  * subtract 1.  Avoids if/else */
0264                 t += (((k+1)&2)-1);
0265             }
0266             /* Correct for the initial -1, to get the
0267              * final symbol length */
0268             length[i] = t+1;
0269         }
0270         /* Find largest and smallest lengths in this group */
0271         minLen = maxLen = length[0];
0272 
0273         for (i = 1; i < symCount; i++) {
0274             if (length[i] > maxLen)
0275                 maxLen = length[i];
0276             else if (length[i] < minLen)
0277                 minLen = length[i];
0278         }
0279 
0280         /* Calculate permute[], base[], and limit[] tables from
0281          * length[].
0282          *
0283          * permute[] is the lookup table for converting
0284          * Huffman coded symbols into decoded symbols.  base[]
0285          * is the amount to subtract from the value of a
0286          * Huffman symbol of a given length when using
0287          * permute[].
0288          *
0289          * limit[] indicates the largest numerical value a
0290          * symbol with a given number of bits can have.  This
0291          * is how the Huffman codes can vary in length: each
0292          * code with a value > limit[length] needs another
0293          * bit.
0294          */
0295         hufGroup = bd->groups+j;
0296         hufGroup->minLen = minLen;
0297         hufGroup->maxLen = maxLen;
0298         /* Note that minLen can't be smaller than 1, so we
0299            adjust the base and limit array pointers so we're
0300            not always wasting the first entry.  We do this
0301            again when using them (during symbol decoding).*/
0302         base = hufGroup->base-1;
0303         limit = hufGroup->limit-1;
0304         /* Calculate permute[].  Concurrently, initialize
0305          * temp[] and limit[]. */
0306         pp = 0;
0307         for (i = minLen; i <= maxLen; i++) {
0308             temp[i] = limit[i] = 0;
0309             for (t = 0; t < symCount; t++)
0310                 if (length[t] == i)
0311                     hufGroup->permute[pp++] = t;
0312         }
0313         /* Count symbols coded for at each bit length */
0314         for (i = 0; i < symCount; i++)
0315             temp[length[i]]++;
0316         /* Calculate limit[] (the largest symbol-coding value
0317          *at each bit length, which is (previous limit <<
0318          *1)+symbols at this level), and base[] (number of
0319          *symbols to ignore at each bit length, which is limit
0320          *minus the cumulative count of symbols coded for
0321          *already). */
0322         pp = t = 0;
0323         for (i = minLen; i < maxLen; i++) {
0324             pp += temp[i];
0325             /* We read the largest possible symbol size
0326                and then unget bits after determining how
0327                many we need, and those extra bits could be
0328                set to anything.  (They're noise from
0329                future symbols.)  At each level we're
0330                really only interested in the first few
0331                bits, so here we set all the trailing
0332                to-be-ignored bits to 1 so they don't
0333                affect the value > limit[length]
0334                comparison. */
0335             limit[i] = (pp << (maxLen - i)) - 1;
0336             pp <<= 1;
0337             base[i+1] = pp-(t += temp[i]);
0338         }
0339         limit[maxLen+1] = INT_MAX; /* Sentinal value for
0340                         * reading next sym. */
0341         limit[maxLen] = pp+temp[maxLen]-1;
0342         base[minLen] = 0;
0343     }
0344     /* We've finished reading and digesting the block header.  Now
0345        read this block's Huffman coded symbols from the file and
0346        undo the Huffman coding and run length encoding, saving the
0347        result into dbuf[dbufCount++] = uc */
0348 
0349     /* Initialize symbol occurrence counters and symbol Move To
0350      * Front table */
0351     for (i = 0; i < 256; i++) {
0352         byteCount[i] = 0;
0353         mtfSymbol[i] = (unsigned char)i;
0354     }
0355     /* Loop through compressed symbols. */
0356     runPos = dbufCount = symCount = selector = 0;
0357     for (;;) {
0358         /* Determine which Huffman coding group to use. */
0359         if (!(symCount--)) {
0360             symCount = GROUP_SIZE-1;
0361             if (selector >= nSelectors)
0362                 return RETVAL_DATA_ERROR;
0363             hufGroup = bd->groups+selectors[selector++];
0364             base = hufGroup->base-1;
0365             limit = hufGroup->limit-1;
0366         }
0367         /* Read next Huffman-coded symbol. */
0368         /* Note: It is far cheaper to read maxLen bits and
0369            back up than it is to read minLen bits and then an
0370            additional bit at a time, testing as we go.
0371            Because there is a trailing last block (with file
0372            CRC), there is no danger of the overread causing an
0373            unexpected EOF for a valid compressed file.  As a
0374            further optimization, we do the read inline
0375            (falling back to a call to get_bits if the buffer
0376            runs dry).  The following (up to got_huff_bits:) is
0377            equivalent to j = get_bits(bd, hufGroup->maxLen);
0378          */
0379         while (bd->inbufBitCount < hufGroup->maxLen) {
0380             if (bd->inbufPos == bd->inbufCount) {
0381                 j = get_bits(bd, hufGroup->maxLen);
0382                 goto got_huff_bits;
0383             }
0384             bd->inbufBits =
0385                 (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
0386             bd->inbufBitCount += 8;
0387         };
0388         bd->inbufBitCount -= hufGroup->maxLen;
0389         j = (bd->inbufBits >> bd->inbufBitCount)&
0390             ((1 << hufGroup->maxLen)-1);
0391 got_huff_bits:
0392         /* Figure how how many bits are in next symbol and
0393          * unget extras */
0394         i = hufGroup->minLen;
0395         while (j > limit[i])
0396             ++i;
0397         bd->inbufBitCount += (hufGroup->maxLen - i);
0398         /* Huffman decode value to get nextSym (with bounds checking) */
0399         if ((i > hufGroup->maxLen)
0400             || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
0401                 >= MAX_SYMBOLS))
0402             return RETVAL_DATA_ERROR;
0403         nextSym = hufGroup->permute[j];
0404         /* We have now decoded the symbol, which indicates
0405            either a new literal byte, or a repeated run of the
0406            most recent literal byte.  First, check if nextSym
0407            indicates a repeated run, and if so loop collecting
0408            how many times to repeat the last literal. */
0409         if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
0410             /* If this is the start of a new run, zero out
0411              * counter */
0412             if (!runPos) {
0413                 runPos = 1;
0414                 t = 0;
0415             }
0416             /* Neat trick that saves 1 symbol: instead of
0417                or-ing 0 or 1 at each bit position, add 1
0418                or 2 instead.  For example, 1011 is 1 << 0
0419                + 1 << 1 + 2 << 2.  1010 is 2 << 0 + 2 << 1
0420                + 1 << 2.  You can make any bit pattern
0421                that way using 1 less symbol than the basic
0422                or 0/1 method (except all bits 0, which
0423                would use no symbols, but a run of length 0
0424                doesn't mean anything in this context).
0425                Thus space is saved. */
0426             t += (runPos << nextSym);
0427             /* +runPos if RUNA; +2*runPos if RUNB */
0428 
0429             runPos <<= 1;
0430             continue;
0431         }
0432         /* When we hit the first non-run symbol after a run,
0433            we now know how many times to repeat the last
0434            literal, so append that many copies to our buffer
0435            of decoded symbols (dbuf) now.  (The last literal
0436            used is the one at the head of the mtfSymbol
0437            array.) */
0438         if (runPos) {
0439             runPos = 0;
0440             if (dbufCount+t >= dbufSize)
0441                 return RETVAL_DATA_ERROR;
0442 
0443             uc = symToByte[mtfSymbol[0]];
0444             byteCount[uc] += t;
0445             while (t--)
0446                 dbuf[dbufCount++] = uc;
0447         }
0448         /* Is this the terminating symbol? */
0449         if (nextSym > symTotal)
0450             break;
0451         /* At this point, nextSym indicates a new literal
0452            character.  Subtract one to get the position in the
0453            MTF array at which this literal is currently to be
0454            found.  (Note that the result can't be -1 or 0,
0455            because 0 and 1 are RUNA and RUNB.  But another
0456            instance of the first symbol in the mtf array,
0457            position 0, would have been handled as part of a
0458            run above.  Therefore 1 unused mtf position minus 2
0459            non-literal nextSym values equals -1.) */
0460         if (dbufCount >= dbufSize)
0461             return RETVAL_DATA_ERROR;
0462         i = nextSym - 1;
0463         uc = mtfSymbol[i];
0464         /* Adjust the MTF array.  Since we typically expect to
0465          *move only a small number of symbols, and are bound
0466          *by 256 in any case, using memmove here would
0467          *typically be bigger and slower due to function call
0468          *overhead and other assorted setup costs. */
0469         do {
0470             mtfSymbol[i] = mtfSymbol[i-1];
0471         } while (--i);
0472         mtfSymbol[0] = uc;
0473         uc = symToByte[uc];
0474         /* We have our literal byte.  Save it into dbuf. */
0475         byteCount[uc]++;
0476         dbuf[dbufCount++] = (unsigned int)uc;
0477     }
0478     /* At this point, we've read all the Huffman-coded symbols
0479        (and repeated runs) for this block from the input stream,
0480        and decoded them into the intermediate buffer.  There are
0481        dbufCount many decoded bytes in dbuf[].  Now undo the
0482        Burrows-Wheeler transform on dbuf.  See
0483        http://dogma.net/markn/articles/bwt/bwt.htm
0484      */
0485     /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
0486     j = 0;
0487     for (i = 0; i < 256; i++) {
0488         k = j+byteCount[i];
0489         byteCount[i] = j;
0490         j = k;
0491     }
0492     /* Figure out what order dbuf would be in if we sorted it. */
0493     for (i = 0; i < dbufCount; i++) {
0494         uc = (unsigned char)(dbuf[i] & 0xff);
0495         dbuf[byteCount[uc]] |= (i << 8);
0496         byteCount[uc]++;
0497     }
0498     /* Decode first byte by hand to initialize "previous" byte.
0499        Note that it doesn't get output, and if the first three
0500        characters are identical it doesn't qualify as a run (hence
0501        writeRunCountdown = 5). */
0502     if (dbufCount) {
0503         if (origPtr >= dbufCount)
0504             return RETVAL_DATA_ERROR;
0505         bd->writePos = dbuf[origPtr];
0506         bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
0507         bd->writePos >>= 8;
0508         bd->writeRunCountdown = 5;
0509     }
0510     bd->writeCount = dbufCount;
0511 
0512     return RETVAL_OK;
0513 }
0514 
0515 /* Undo burrows-wheeler transform on intermediate buffer to produce output.
0516    If start_bunzip was initialized with out_fd =-1, then up to len bytes of
0517    data are written to outbuf.  Return value is number of bytes written or
0518    error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
0519    are ignored, data is written to out_fd and return is RETVAL_OK or error.
0520 */
0521 
0522 static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
0523 {
0524     const unsigned int *dbuf;
0525     int pos, xcurrent, previous, gotcount;
0526 
0527     /* If last read was short due to end of file, return last block now */
0528     if (bd->writeCount < 0)
0529         return bd->writeCount;
0530 
0531     gotcount = 0;
0532     dbuf = bd->dbuf;
0533     pos = bd->writePos;
0534     xcurrent = bd->writeCurrent;
0535 
0536     /* We will always have pending decoded data to write into the output
0537        buffer unless this is the very first call (in which case we haven't
0538        Huffman-decoded a block into the intermediate buffer yet). */
0539 
0540     if (bd->writeCopies) {
0541         /* Inside the loop, writeCopies means extra copies (beyond 1) */
0542         --bd->writeCopies;
0543         /* Loop outputting bytes */
0544         for (;;) {
0545             /* If the output buffer is full, snapshot
0546              * state and return */
0547             if (gotcount >= len) {
0548                 bd->writePos = pos;
0549                 bd->writeCurrent = xcurrent;
0550                 bd->writeCopies++;
0551                 return len;
0552             }
0553             /* Write next byte into output buffer, updating CRC */
0554             outbuf[gotcount++] = xcurrent;
0555             bd->writeCRC = (((bd->writeCRC) << 8)
0556                 ^bd->crc32Table[((bd->writeCRC) >> 24)
0557                 ^xcurrent]);
0558             /* Loop now if we're outputting multiple
0559              * copies of this byte */
0560             if (bd->writeCopies) {
0561                 --bd->writeCopies;
0562                 continue;
0563             }
0564 decode_next_byte:
0565             if (!bd->writeCount--)
0566                 break;
0567             /* Follow sequence vector to undo
0568              * Burrows-Wheeler transform */
0569             previous = xcurrent;
0570             pos = dbuf[pos];
0571             xcurrent = pos&0xff;
0572             pos >>= 8;
0573             /* After 3 consecutive copies of the same
0574                byte, the 4th is a repeat count.  We count
0575                down from 4 instead *of counting up because
0576                testing for non-zero is faster */
0577             if (--bd->writeRunCountdown) {
0578                 if (xcurrent != previous)
0579                     bd->writeRunCountdown = 4;
0580             } else {
0581                 /* We have a repeated run, this byte
0582                  * indicates the count */
0583                 bd->writeCopies = xcurrent;
0584                 xcurrent = previous;
0585                 bd->writeRunCountdown = 5;
0586                 /* Sometimes there are just 3 bytes
0587                  * (run length 0) */
0588                 if (!bd->writeCopies)
0589                     goto decode_next_byte;
0590                 /* Subtract the 1 copy we'd output
0591                  * anyway to get extras */
0592                 --bd->writeCopies;
0593             }
0594         }
0595         /* Decompression of this block completed successfully */
0596         bd->writeCRC = ~bd->writeCRC;
0597         bd->totalCRC = ((bd->totalCRC << 1) |
0598                 (bd->totalCRC >> 31)) ^ bd->writeCRC;
0599         /* If this block had a CRC error, force file level CRC error. */
0600         if (bd->writeCRC != bd->headerCRC) {
0601             bd->totalCRC = bd->headerCRC+1;
0602             return RETVAL_LAST_BLOCK;
0603         }
0604     }
0605 
0606     /* Refill the intermediate buffer by Huffman-decoding next
0607      * block of input */
0608     /* (previous is just a convenient unused temp variable here) */
0609     previous = get_next_block(bd);
0610     if (previous) {
0611         bd->writeCount = previous;
0612         return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
0613     }
0614     bd->writeCRC = 0xffffffffUL;
0615     pos = bd->writePos;
0616     xcurrent = bd->writeCurrent;
0617     goto decode_next_byte;
0618 }
0619 
0620 static long INIT nofill(void *buf, unsigned long len)
0621 {
0622     return -1;
0623 }
0624 
0625 /* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
0626    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
0627    ignored, and data is read from file handle into temporary buffer. */
0628 static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
0629                  long (*fill)(void*, unsigned long))
0630 {
0631     struct bunzip_data *bd;
0632     unsigned int i, j, c;
0633     const unsigned int BZh0 =
0634         (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
0635         +(((unsigned int)'h') << 8)+(unsigned int)'0';
0636 
0637     /* Figure out how much data to allocate */
0638     i = sizeof(struct bunzip_data);
0639 
0640     /* Allocate bunzip_data.  Most fields initialize to zero. */
0641     bd = *bdp = malloc(i);
0642     if (!bd)
0643         return RETVAL_OUT_OF_MEMORY;
0644     memset(bd, 0, sizeof(struct bunzip_data));
0645     /* Setup input buffer */
0646     bd->inbuf = inbuf;
0647     bd->inbufCount = len;
0648     if (fill != NULL)
0649         bd->fill = fill;
0650     else
0651         bd->fill = nofill;
0652 
0653     /* Init the CRC32 table (big endian) */
0654     for (i = 0; i < 256; i++) {
0655         c = i << 24;
0656         for (j = 8; j; j--)
0657             c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
0658         bd->crc32Table[i] = c;
0659     }
0660 
0661     /* Ensure that file starts with "BZh['1'-'9']." */
0662     i = get_bits(bd, 32);
0663     if (((unsigned int)(i-BZh0-1)) >= 9)
0664         return RETVAL_NOT_BZIP_DATA;
0665 
0666     /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
0667        uncompressed data.  Allocate intermediate buffer for block. */
0668     bd->dbufSize = 100000*(i-BZh0);
0669 
0670     bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
0671     if (!bd->dbuf)
0672         return RETVAL_OUT_OF_MEMORY;
0673     return RETVAL_OK;
0674 }
0675 
0676 /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
0677    not end of file.) */
0678 STATIC int INIT bunzip2(unsigned char *buf, long len,
0679             long (*fill)(void*, unsigned long),
0680             long (*flush)(void*, unsigned long),
0681             unsigned char *outbuf,
0682             long *pos,
0683             void(*error)(char *x))
0684 {
0685     struct bunzip_data *bd;
0686     int i = -1;
0687     unsigned char *inbuf;
0688 
0689     if (flush)
0690         outbuf = malloc(BZIP2_IOBUF_SIZE);
0691 
0692     if (!outbuf) {
0693         error("Could not allocate output buffer");
0694         return RETVAL_OUT_OF_MEMORY;
0695     }
0696     if (buf)
0697         inbuf = buf;
0698     else
0699         inbuf = malloc(BZIP2_IOBUF_SIZE);
0700     if (!inbuf) {
0701         error("Could not allocate input buffer");
0702         i = RETVAL_OUT_OF_MEMORY;
0703         goto exit_0;
0704     }
0705     i = start_bunzip(&bd, inbuf, len, fill);
0706     if (!i) {
0707         for (;;) {
0708             i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
0709             if (i <= 0)
0710                 break;
0711             if (!flush)
0712                 outbuf += i;
0713             else
0714                 if (i != flush(outbuf, i)) {
0715                     i = RETVAL_UNEXPECTED_OUTPUT_EOF;
0716                     break;
0717                 }
0718         }
0719     }
0720     /* Check CRC and release memory */
0721     if (i == RETVAL_LAST_BLOCK) {
0722         if (bd->headerCRC != bd->totalCRC)
0723             error("Data integrity error when decompressing.");
0724         else
0725             i = RETVAL_OK;
0726     } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
0727         error("Compressed file ends unexpectedly");
0728     }
0729     if (!bd)
0730         goto exit_1;
0731     if (bd->dbuf)
0732         large_free(bd->dbuf);
0733     if (pos)
0734         *pos = bd->inbufPos;
0735     free(bd);
0736 exit_1:
0737     if (!buf)
0738         free(inbuf);
0739 exit_0:
0740     if (flush)
0741         free(outbuf);
0742     return i;
0743 }
0744 
0745 #ifdef PREBOOT
0746 STATIC int INIT __decompress(unsigned char *buf, long len,
0747             long (*fill)(void*, unsigned long),
0748             long (*flush)(void*, unsigned long),
0749             unsigned char *outbuf, long olen,
0750             long *pos,
0751             void (*error)(char *x))
0752 {
0753     return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
0754 }
0755 #endif