lib/xz/xz_dec_lzma2.c

0001 /*
0002  * LZMA2 decoder
0003  *
0004  * Authors: Lasse Collin <lasse.collin@tukaani.org>
0005  *          Igor Pavlov <https://7-zip.org/>
0006  *
0007  * This file has been put into the public domain.
0008  * You can do whatever you want with this file.
0009  */
0010
0011 #include "xz_private.h"
0012 #include "xz_lzma2.h"
0013
0014 /*
0015  * Range decoder initialization eats the first five bytes of each LZMA chunk.
0016  */
0017 #define RC_INIT_BYTES 5
0018
0019 /*
0020  * Minimum number of usable input buffer to safely decode one LZMA symbol.
0021  * The worst case is that we decode 22 bits using probabilities and 26
0022  * direct bits. This may decode at maximum of 20 bytes of input. However,
0023  * lzma_main() does an extra normalization before returning, thus we
0024  * need to put 21 here.
0025  */
0026 #define LZMA_IN_REQUIRED 21
0027
0028 /*
0029  * Dictionary (history buffer)
0030  *
0031  * These are always true:
0032  *    start <= pos <= full <= end
0033  *    pos <= limit <= end
0034  *
0035  * In multi-call mode, also these are true:
0036  *    end == size
0037  *    size <= size_max
0038  *    allocated <= size
0039  *
0040  * Most of these variables are size_t to support single-call mode,
0041  * in which the dictionary variables address the actual output
0042  * buffer directly.
0043  */
0044 struct dictionary {
0045     /* Beginning of the history buffer */
0046     uint8_t *buf;
0047
0048     /* Old position in buf (before decoding more data) */
0049     size_t start;
0050
0051     /* Position in buf */
0052     size_t pos;
0053
0054     /*
0055      * How full dictionary is. This is used to detect corrupt input that
0056      * would read beyond the beginning of the uncompressed stream.
0057      */
0058     size_t full;
0059
0060     /* Write limit; we don't write to buf[limit] or later bytes. */
0061     size_t limit;
0062
0063     /*
0064      * End of the dictionary buffer. In multi-call mode, this is
0065      * the same as the dictionary size. In single-call mode, this
0066      * indicates the size of the output buffer.
0067      */
0068     size_t end;
0069
0070     /*
0071      * Size of the dictionary as specified in Block Header. This is used
0072      * together with "full" to detect corrupt input that would make us
0073      * read beyond the beginning of the uncompressed stream.
0074      */
0075     uint32_t size;
0076
0077     /*
0078      * Maximum allowed dictionary size in multi-call mode.
0079      * This is ignored in single-call mode.
0080      */
0081     uint32_t size_max;
0082
0083     /*
0084      * Amount of memory currently allocated for the dictionary.
0085      * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
0086      * size_max is always the same as the allocated size.)
0087      */
0088     uint32_t allocated;
0089
0090     /* Operation mode */
0091     enum xz_mode mode;
0092 };
0093
0094 /* Range decoder */
0095 struct rc_dec {
0096     uint32_t range;
0097     uint32_t code;
0098
0099     /*
0100      * Number of initializing bytes remaining to be read
0101      * by rc_read_init().
0102      */
0103     uint32_t init_bytes_left;
0104
0105     /*
0106      * Buffer from which we read our input. It can be either
0107      * temp.buf or the caller-provided input buffer.
0108      */
0109     const uint8_t *in;
0110     size_t in_pos;
0111     size_t in_limit;
0112 };
0113
0114 /* Probabilities for a length decoder. */
0115 struct lzma_len_dec {
0116     /* Probability of match length being at least 10 */
0117     uint16_t choice;
0118
0119     /* Probability of match length being at least 18 */
0120     uint16_t choice2;
0121
0122     /* Probabilities for match lengths 2-9 */
0123     uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
0124
0125     /* Probabilities for match lengths 10-17 */
0126     uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
0127
0128     /* Probabilities for match lengths 18-273 */
0129     uint16_t high[LEN_HIGH_SYMBOLS];
0130 };
0131
0132 struct lzma_dec {
0133     /* Distances of latest four matches */
0134     uint32_t rep0;
0135     uint32_t rep1;
0136     uint32_t rep2;
0137     uint32_t rep3;
0138
0139     /* Types of the most recently seen LZMA symbols */
0140     enum lzma_state state;
0141
0142     /*
0143      * Length of a match. This is updated so that dict_repeat can
0144      * be called again to finish repeating the whole match.
0145      */
0146     uint32_t len;
0147
0148     /*
0149      * LZMA properties or related bit masks (number of literal
0150      * context bits, a mask derived from the number of literal
0151      * position bits, and a mask derived from the number
0152      * position bits)
0153      */
0154     uint32_t lc;
0155     uint32_t literal_pos_mask; /* (1 << lp) - 1 */
0156     uint32_t pos_mask;         /* (1 << pb) - 1 */
0157
0158     /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
0159     uint16_t is_match[STATES][POS_STATES_MAX];
0160
0161     /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
0162     uint16_t is_rep[STATES];
0163
0164     /*
0165      * If 0, distance of a repeated match is rep0.
0166      * Otherwise check is_rep1.
0167      */
0168     uint16_t is_rep0[STATES];
0169
0170     /*
0171      * If 0, distance of a repeated match is rep1.
0172      * Otherwise check is_rep2.
0173      */
0174     uint16_t is_rep1[STATES];
0175
0176     /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
0177     uint16_t is_rep2[STATES];
0178
0179     /*
0180      * If 1, the repeated match has length of one byte. Otherwise
0181      * the length is decoded from rep_len_decoder.
0182      */
0183     uint16_t is_rep0_long[STATES][POS_STATES_MAX];
0184
0185     /*
0186      * Probability tree for the highest two bits of the match
0187      * distance. There is a separate probability tree for match
0188      * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
0189      */
0190     uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
0191
0192     /*
0193      * Probility trees for additional bits for match distance
0194      * when the distance is in the range [4, 127].
0195      */
0196     uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
0197
0198     /*
0199      * Probability tree for the lowest four bits of a match
0200      * distance that is equal to or greater than 128.
0201      */
0202     uint16_t dist_align[ALIGN_SIZE];
0203
0204     /* Length of a normal match */
0205     struct lzma_len_dec match_len_dec;
0206
0207     /* Length of a repeated match */
0208     struct lzma_len_dec rep_len_dec;
0209
0210     /* Probabilities of literals */
0211     uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
0212 };
0213
0214 struct lzma2_dec {
0215     /* Position in xz_dec_lzma2_run(). */
0216     enum lzma2_seq {
0217         SEQ_CONTROL,
0218         SEQ_UNCOMPRESSED_1,
0219         SEQ_UNCOMPRESSED_2,
0220         SEQ_COMPRESSED_0,
0221         SEQ_COMPRESSED_1,
0222         SEQ_PROPERTIES,
0223         SEQ_LZMA_PREPARE,
0224         SEQ_LZMA_RUN,
0225         SEQ_COPY
0226     } sequence;
0227
0228     /* Next position after decoding the compressed size of the chunk. */
0229     enum lzma2_seq next_sequence;
0230
0231     /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
0232     uint32_t uncompressed;
0233
0234     /*
0235      * Compressed size of LZMA chunk or compressed/uncompressed
0236      * size of uncompressed chunk (64 KiB at maximum)
0237      */
0238     uint32_t compressed;
0239
0240     /*
0241      * True if dictionary reset is needed. This is false before
0242      * the first chunk (LZMA or uncompressed).
0243      */
0244     bool need_dict_reset;
0245
0246     /*
0247      * True if new LZMA properties are needed. This is false
0248      * before the first LZMA chunk.
0249      */
0250     bool need_props;
0251
0252 #ifdef XZ_DEC_MICROLZMA
0253     bool pedantic_microlzma;
0254 #endif
0255 };
0256
0257 struct xz_dec_lzma2 {
0258     /*
0259      * The order below is important on x86 to reduce code size and
0260      * it shouldn't hurt on other platforms. Everything up to and
0261      * including lzma.pos_mask are in the first 128 bytes on x86-32,
0262      * which allows using smaller instructions to access those
0263      * variables. On x86-64, fewer variables fit into the first 128
0264      * bytes, but this is still the best order without sacrificing
0265      * the readability by splitting the structures.
0266      */
0267     struct rc_dec rc;
0268     struct dictionary dict;
0269     struct lzma2_dec lzma2;
0270     struct lzma_dec lzma;
0271
0272     /*
0273      * Temporary buffer which holds small number of input bytes between
0274      * decoder calls. See lzma2_lzma() for details.
0275      */
0276     struct {
0277         uint32_t size;
0278         uint8_t buf[3 * LZMA_IN_REQUIRED];
0279     } temp;
0280 };
0281
0282 /**************
0283  * Dictionary *
0284  **************/
0285
0286 /*
0287  * Reset the dictionary state. When in single-call mode, set up the beginning
0288  * of the dictionary to point to the actual output buffer.
0289  */
0290 static void dict_reset(struct dictionary *dict, struct xz_buf *b)
0291 {
0292     if (DEC_IS_SINGLE(dict->mode)) {
0293         dict->buf = b->out + b->out_pos;
0294         dict->end = b->out_size - b->out_pos;
0295     }
0296
0297     dict->start = 0;
0298     dict->pos = 0;
0299     dict->limit = 0;
0300     dict->full = 0;
0301 }
0302
0303 /* Set dictionary write limit */
0304 static void dict_limit(struct dictionary *dict, size_t out_max)
0305 {
0306     if (dict->end - dict->pos <= out_max)
0307         dict->limit = dict->end;
0308     else
0309         dict->limit = dict->pos + out_max;
0310 }
0311
0312 /* Return true if at least one byte can be written into the dictionary. */
0313 static inline bool dict_has_space(const struct dictionary *dict)
0314 {
0315     return dict->pos < dict->limit;
0316 }
0317
0318 /*
0319  * Get a byte from the dictionary at the given distance. The distance is
0320  * assumed to valid, or as a special case, zero when the dictionary is
0321  * still empty. This special case is needed for single-call decoding to
0322  * avoid writing a '\0' to the end of the destination buffer.
0323  */
0324 static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
0325 {
0326     size_t offset = dict->pos - dist - 1;
0327
0328     if (dist >= dict->pos)
0329         offset += dict->end;
0330
0331     return dict->full > 0 ? dict->buf[offset] : 0;
0332 }
0333
0334 /*
0335  * Put one byte into the dictionary. It is assumed that there is space for it.
0336  */
0337 static inline void dict_put(struct dictionary *dict, uint8_t byte)
0338 {
0339     dict->buf[dict->pos++] = byte;
0340
0341     if (dict->full < dict->pos)
0342         dict->full = dict->pos;
0343 }
0344
0345 /*
0346  * Repeat given number of bytes from the given distance. If the distance is
0347  * invalid, false is returned. On success, true is returned and *len is
0348  * updated to indicate how many bytes were left to be repeated.
0349  */
0350 static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
0351 {
0352     size_t back;
0353     uint32_t left;
0354
0355     if (dist >= dict->full || dist >= dict->size)
0356         return false;
0357
0358     left = min_t(size_t, dict->limit - dict->pos, *len);
0359     *len -= left;
0360
0361     back = dict->pos - dist - 1;
0362     if (dist >= dict->pos)
0363         back += dict->end;
0364
0365     do {
0366         dict->buf[dict->pos++] = dict->buf[back++];
0367         if (back == dict->end)
0368             back = 0;
0369     } while (--left > 0);
0370
0371     if (dict->full < dict->pos)
0372         dict->full = dict->pos;
0373
0374     return true;
0375 }
0376
0377 /* Copy uncompressed data as is from input to dictionary and output buffers. */
0378 static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
0379                   uint32_t *left)
0380 {
0381     size_t copy_size;
0382
0383     while (*left > 0 && b->in_pos < b->in_size
0384             && b->out_pos < b->out_size) {
0385         copy_size = min(b->in_size - b->in_pos,
0386                 b->out_size - b->out_pos);
0387         if (copy_size > dict->end - dict->pos)
0388             copy_size = dict->end - dict->pos;
0389         if (copy_size > *left)
0390             copy_size = *left;
0391
0392         *left -= copy_size;
0393
0394         /*
0395          * If doing in-place decompression in single-call mode and the
0396          * uncompressed size of the file is larger than the caller
0397          * thought (i.e. it is invalid input!), the buffers below may
0398          * overlap and cause undefined behavior with memcpy().
0399          * With valid inputs memcpy() would be fine here.
0400          */
0401         memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
0402         dict->pos += copy_size;
0403
0404         if (dict->full < dict->pos)
0405             dict->full = dict->pos;
0406
0407         if (DEC_IS_MULTI(dict->mode)) {
0408             if (dict->pos == dict->end)
0409                 dict->pos = 0;
0410
0411             /*
0412              * Like above but for multi-call mode: use memmove()
0413              * to avoid undefined behavior with invalid input.
0414              */
0415             memmove(b->out + b->out_pos, b->in + b->in_pos,
0416                     copy_size);
0417         }
0418
0419         dict->start = dict->pos;
0420
0421         b->out_pos += copy_size;
0422         b->in_pos += copy_size;
0423     }
0424 }
0425
0426 #ifdef XZ_DEC_MICROLZMA
0427 #   define DICT_FLUSH_SUPPORTS_SKIPPING true
0428 #else
0429 #   define DICT_FLUSH_SUPPORTS_SKIPPING false
0430 #endif
0431
0432 /*
0433  * Flush pending data from dictionary to b->out. It is assumed that there is
0434  * enough space in b->out. This is guaranteed because caller uses dict_limit()
0435  * before decoding data into the dictionary.
0436  */
0437 static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
0438 {
0439     size_t copy_size = dict->pos - dict->start;
0440
0441     if (DEC_IS_MULTI(dict->mode)) {
0442         if (dict->pos == dict->end)
0443             dict->pos = 0;
0444
0445         /*
0446          * These buffers cannot overlap even if doing in-place
0447          * decompression because in multi-call mode dict->buf
0448          * has been allocated by us in this file; it's not
0449          * provided by the caller like in single-call mode.
0450          *
0451          * With MicroLZMA, b->out can be NULL to skip bytes that
0452          * the caller doesn't need. This cannot be done with XZ
0453          * because it would break BCJ filters.
0454          */
0455         if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL)
0456             memcpy(b->out + b->out_pos, dict->buf + dict->start,
0457                     copy_size);
0458     }
0459
0460     dict->start = dict->pos;
0461     b->out_pos += copy_size;
0462     return copy_size;
0463 }
0464
0465 /*****************
0466  * Range decoder *
0467  *****************/
0468
0469 /* Reset the range decoder. */
0470 static void rc_reset(struct rc_dec *rc)
0471 {
0472     rc->range = (uint32_t)-1;
0473     rc->code = 0;
0474     rc->init_bytes_left = RC_INIT_BYTES;
0475 }
0476
0477 /*
0478  * Read the first five initial bytes into rc->code if they haven't been
0479  * read already. (Yes, the first byte gets completely ignored.)
0480  */
0481 static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)
0482 {
0483     while (rc->init_bytes_left > 0) {
0484         if (b->in_pos == b->in_size)
0485             return false;
0486
0487         rc->code = (rc->code << 8) + b->in[b->in_pos++];
0488         --rc->init_bytes_left;
0489     }
0490
0491     return true;
0492 }
0493
0494 /* Return true if there may not be enough input for the next decoding loop. */
0495 static inline bool rc_limit_exceeded(const struct rc_dec *rc)
0496 {
0497     return rc->in_pos > rc->in_limit;
0498 }
0499
0500 /*
0501  * Return true if it is possible (from point of view of range decoder) that
0502  * we have reached the end of the LZMA chunk.
0503  */
0504 static inline bool rc_is_finished(const struct rc_dec *rc)
0505 {
0506     return rc->code == 0;
0507 }
0508
0509 /* Read the next input byte if needed. */
0510 static __always_inline void rc_normalize(struct rc_dec *rc)
0511 {
0512     if (rc->range < RC_TOP_VALUE) {
0513         rc->range <<= RC_SHIFT_BITS;
0514         rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
0515     }
0516 }
0517
0518 /*
0519  * Decode one bit. In some versions, this function has been split in three
0520  * functions so that the compiler is supposed to be able to more easily avoid
0521  * an extra branch. In this particular version of the LZMA decoder, this
0522  * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
0523  * on x86). Using a non-split version results in nicer looking code too.
0524  *
0525  * NOTE: This must return an int. Do not make it return a bool or the speed
0526  * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
0527  * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
0528  */
0529 static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
0530 {
0531     uint32_t bound;
0532     int bit;
0533
0534     rc_normalize(rc);
0535     bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
0536     if (rc->code < bound) {
0537         rc->range = bound;
0538         *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
0539         bit = 0;
0540     } else {
0541         rc->range -= bound;
0542         rc->code -= bound;
0543         *prob -= *prob >> RC_MOVE_BITS;
0544         bit = 1;
0545     }
0546
0547     return bit;
0548 }
0549
0550 /* Decode a bittree starting from the most significant bit. */
0551 static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
0552                        uint16_t *probs, uint32_t limit)
0553 {
0554     uint32_t symbol = 1;
0555
0556     do {
0557         if (rc_bit(rc, &probs[symbol]))
0558             symbol = (symbol << 1) + 1;
0559         else
0560             symbol <<= 1;
0561     } while (symbol < limit);
0562
0563     return symbol;
0564 }
0565
0566 /* Decode a bittree starting from the least significant bit. */
0567 static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
0568                            uint16_t *probs,
0569                            uint32_t *dest, uint32_t limit)
0570 {
0571     uint32_t symbol = 1;
0572     uint32_t i = 0;
0573
0574     do {
0575         if (rc_bit(rc, &probs[symbol])) {
0576             symbol = (symbol << 1) + 1;
0577             *dest += 1 << i;
0578         } else {
0579             symbol <<= 1;
0580         }
0581     } while (++i < limit);
0582 }
0583
0584 /* Decode direct bits (fixed fifty-fifty probability) */
0585 static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
0586 {
0587     uint32_t mask;
0588
0589     do {
0590         rc_normalize(rc);
0591         rc->range >>= 1;
0592         rc->code -= rc->range;
0593         mask = (uint32_t)0 - (rc->code >> 31);
0594         rc->code += rc->range & mask;
0595         *dest = (*dest << 1) + (mask + 1);
0596     } while (--limit > 0);
0597 }
0598
0599 /********
0600  * LZMA *
0601  ********/
0602
0603 /* Get pointer to literal coder probability array. */
0604 static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
0605 {
0606     uint32_t prev_byte = dict_get(&s->dict, 0);
0607     uint32_t low = prev_byte >> (8 - s->lzma.lc);
0608     uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
0609     return s->lzma.literal[low + high];
0610 }
0611
0612 /* Decode a literal (one 8-bit byte) */
0613 static void lzma_literal(struct xz_dec_lzma2 *s)
0614 {
0615     uint16_t *probs;
0616     uint32_t symbol;
0617     uint32_t match_byte;
0618     uint32_t match_bit;
0619     uint32_t offset;
0620     uint32_t i;
0621
0622     probs = lzma_literal_probs(s);
0623
0624     if (lzma_state_is_literal(s->lzma.state)) {
0625         symbol = rc_bittree(&s->rc, probs, 0x100);
0626     } else {
0627         symbol = 1;
0628         match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
0629         offset = 0x100;
0630
0631         do {
0632             match_bit = match_byte & offset;
0633             match_byte <<= 1;
0634             i = offset + match_bit + symbol;
0635
0636             if (rc_bit(&s->rc, &probs[i])) {
0637                 symbol = (symbol << 1) + 1;
0638                 offset &= match_bit;
0639             } else {
0640                 symbol <<= 1;
0641                 offset &= ~match_bit;
0642             }
0643         } while (symbol < 0x100);
0644     }
0645
0646     dict_put(&s->dict, (uint8_t)symbol);
0647     lzma_state_literal(&s->lzma.state);
0648 }
0649
0650 /* Decode the length of the match into s->lzma.len. */
0651 static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
0652              uint32_t pos_state)
0653 {
0654     uint16_t *probs;
0655     uint32_t limit;
0656
0657     if (!rc_bit(&s->rc, &l->choice)) {
0658         probs = l->low[pos_state];
0659         limit = LEN_LOW_SYMBOLS;
0660         s->lzma.len = MATCH_LEN_MIN;
0661     } else {
0662         if (!rc_bit(&s->rc, &l->choice2)) {
0663             probs = l->mid[pos_state];
0664             limit = LEN_MID_SYMBOLS;
0665             s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
0666         } else {
0667             probs = l->high;
0668             limit = LEN_HIGH_SYMBOLS;
0669             s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
0670                     + LEN_MID_SYMBOLS;
0671         }
0672     }
0673
0674     s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
0675 }
0676
0677 /* Decode a match. The distance will be stored in s->lzma.rep0. */
0678 static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
0679 {
0680     uint16_t *probs;
0681     uint32_t dist_slot;
0682     uint32_t limit;
0683
0684     lzma_state_match(&s->lzma.state);
0685
0686     s->lzma.rep3 = s->lzma.rep2;
0687     s->lzma.rep2 = s->lzma.rep1;
0688     s->lzma.rep1 = s->lzma.rep0;
0689
0690     lzma_len(s, &s->lzma.match_len_dec, pos_state);
0691
0692     probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
0693     dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
0694
0695     if (dist_slot < DIST_MODEL_START) {
0696         s->lzma.rep0 = dist_slot;
0697     } else {
0698         limit = (dist_slot >> 1) - 1;
0699         s->lzma.rep0 = 2 + (dist_slot & 1);
0700
0701         if (dist_slot < DIST_MODEL_END) {
0702             s->lzma.rep0 <<= limit;
0703             probs = s->lzma.dist_special + s->lzma.rep0
0704                     - dist_slot - 1;
0705             rc_bittree_reverse(&s->rc, probs,
0706                     &s->lzma.rep0, limit);
0707         } else {
0708             rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
0709             s->lzma.rep0 <<= ALIGN_BITS;
0710             rc_bittree_reverse(&s->rc, s->lzma.dist_align,
0711                     &s->lzma.rep0, ALIGN_BITS);
0712         }
0713     }
0714 }
0715
0716 /*
0717  * Decode a repeated match. The distance is one of the four most recently
0718  * seen matches. The distance will be stored in s->lzma.rep0.
0719  */
0720 static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
0721 {
0722     uint32_t tmp;
0723
0724     if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
0725         if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
0726                 s->lzma.state][pos_state])) {
0727             lzma_state_short_rep(&s->lzma.state);
0728             s->lzma.len = 1;
0729             return;
0730         }
0731     } else {
0732         if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
0733             tmp = s->lzma.rep1;
0734         } else {
0735             if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
0736                 tmp = s->lzma.rep2;
0737             } else {
0738                 tmp = s->lzma.rep3;
0739                 s->lzma.rep3 = s->lzma.rep2;
0740             }
0741
0742             s->lzma.rep2 = s->lzma.rep1;
0743         }
0744
0745         s->lzma.rep1 = s->lzma.rep0;
0746         s->lzma.rep0 = tmp;
0747     }
0748
0749     lzma_state_long_rep(&s->lzma.state);
0750     lzma_len(s, &s->lzma.rep_len_dec, pos_state);
0751 }
0752
0753 /* LZMA decoder core */
0754 static bool lzma_main(struct xz_dec_lzma2 *s)
0755 {
0756     uint32_t pos_state;
0757
0758     /*
0759      * If the dictionary was reached during the previous call, try to
0760      * finish the possibly pending repeat in the dictionary.
0761      */
0762     if (dict_has_space(&s->dict) && s->lzma.len > 0)
0763         dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
0764
0765     /*
0766      * Decode more LZMA symbols. One iteration may consume up to
0767      * LZMA_IN_REQUIRED - 1 bytes.
0768      */
0769     while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
0770         pos_state = s->dict.pos & s->lzma.pos_mask;
0771
0772         if (!rc_bit(&s->rc, &s->lzma.is_match[
0773                 s->lzma.state][pos_state])) {
0774             lzma_literal(s);
0775         } else {
0776             if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
0777                 lzma_rep_match(s, pos_state);
0778             else
0779                 lzma_match(s, pos_state);
0780
0781             if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
0782                 return false;
0783         }
0784     }
0785
0786     /*
0787      * Having the range decoder always normalized when we are outside
0788      * this function makes it easier to correctly handle end of the chunk.
0789      */
0790     rc_normalize(&s->rc);
0791
0792     return true;
0793 }
0794
0795 /*
0796  * Reset the LZMA decoder and range decoder state. Dictionary is not reset
0797  * here, because LZMA state may be reset without resetting the dictionary.
0798  */
0799 static void lzma_reset(struct xz_dec_lzma2 *s)
0800 {
0801     uint16_t *probs;
0802     size_t i;
0803
0804     s->lzma.state = STATE_LIT_LIT;
0805     s->lzma.rep0 = 0;
0806     s->lzma.rep1 = 0;
0807     s->lzma.rep2 = 0;
0808     s->lzma.rep3 = 0;
0809     s->lzma.len = 0;
0810
0811     /*
0812      * All probabilities are initialized to the same value. This hack
0813      * makes the code smaller by avoiding a separate loop for each
0814      * probability array.
0815      *
0816      * This could be optimized so that only that part of literal
0817      * probabilities that are actually required. In the common case
0818      * we would write 12 KiB less.
0819      */
0820     probs = s->lzma.is_match[0];
0821     for (i = 0; i < PROBS_TOTAL; ++i)
0822         probs[i] = RC_BIT_MODEL_TOTAL / 2;
0823
0824     rc_reset(&s->rc);
0825 }
0826
0827 /*
0828  * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
0829  * from the decoded lp and pb values. On success, the LZMA decoder state is
0830  * reset and true is returned.
0831  */
0832 static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
0833 {
0834     if (props > (4 * 5 + 4) * 9 + 8)
0835         return false;
0836
0837     s->lzma.pos_mask = 0;
0838     while (props >= 9 * 5) {
0839         props -= 9 * 5;
0840         ++s->lzma.pos_mask;
0841     }
0842
0843     s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
0844
0845     s->lzma.literal_pos_mask = 0;
0846     while (props >= 9) {
0847         props -= 9;
0848         ++s->lzma.literal_pos_mask;
0849     }
0850
0851     s->lzma.lc = props;
0852
0853     if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
0854         return false;
0855
0856     s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
0857
0858     lzma_reset(s);
0859
0860     return true;
0861 }
0862
0863 /*********
0864  * LZMA2 *
0865  *********/
0866
0867 /*
0868  * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
0869  * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
0870  * wrapper function takes care of making the LZMA decoder's assumption safe.
0871  *
0872  * As long as there is plenty of input left to be decoded in the current LZMA
0873  * chunk, we decode directly from the caller-supplied input buffer until
0874  * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
0875  * s->temp.buf, which (hopefully) gets filled on the next call to this
0876  * function. We decode a few bytes from the temporary buffer so that we can
0877  * continue decoding from the caller-supplied input buffer again.
0878  */
0879 static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
0880 {
0881     size_t in_avail;
0882     uint32_t tmp;
0883
0884     in_avail = b->in_size - b->in_pos;
0885     if (s->temp.size > 0 || s->lzma2.compressed == 0) {
0886         tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
0887         if (tmp > s->lzma2.compressed - s->temp.size)
0888             tmp = s->lzma2.compressed - s->temp.size;
0889         if (tmp > in_avail)
0890             tmp = in_avail;
0891
0892         memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
0893
0894         if (s->temp.size + tmp == s->lzma2.compressed) {
0895             memzero(s->temp.buf + s->temp.size + tmp,
0896                     sizeof(s->temp.buf)
0897                         - s->temp.size - tmp);
0898             s->rc.in_limit = s->temp.size + tmp;
0899         } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
0900             s->temp.size += tmp;
0901             b->in_pos += tmp;
0902             return true;
0903         } else {
0904             s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
0905         }
0906
0907         s->rc.in = s->temp.buf;
0908         s->rc.in_pos = 0;
0909
0910         if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
0911             return false;
0912
0913         s->lzma2.compressed -= s->rc.in_pos;
0914
0915         if (s->rc.in_pos < s->temp.size) {
0916             s->temp.size -= s->rc.in_pos;
0917             memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
0918                     s->temp.size);
0919             return true;
0920         }
0921
0922         b->in_pos += s->rc.in_pos - s->temp.size;
0923         s->temp.size = 0;
0924     }
0925
0926     in_avail = b->in_size - b->in_pos;
0927     if (in_avail >= LZMA_IN_REQUIRED) {
0928         s->rc.in = b->in;
0929         s->rc.in_pos = b->in_pos;
0930
0931         if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
0932             s->rc.in_limit = b->in_pos + s->lzma2.compressed;
0933         else
0934             s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
0935
0936         if (!lzma_main(s))
0937             return false;
0938
0939         in_avail = s->rc.in_pos - b->in_pos;
0940         if (in_avail > s->lzma2.compressed)
0941             return false;
0942
0943         s->lzma2.compressed -= in_avail;
0944         b->in_pos = s->rc.in_pos;
0945     }
0946
0947     in_avail = b->in_size - b->in_pos;
0948     if (in_avail < LZMA_IN_REQUIRED) {
0949         if (in_avail > s->lzma2.compressed)
0950             in_avail = s->lzma2.compressed;
0951
0952         memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
0953         s->temp.size = in_avail;
0954         b->in_pos += in_avail;
0955     }
0956
0957     return true;
0958 }
0959
0960 /*
0961  * Take care of the LZMA2 control layer, and forward the job of actual LZMA
0962  * decoding or copying of uncompressed chunks to other functions.
0963  */
0964 XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
0965                        struct xz_buf *b)
0966 {
0967     uint32_t tmp;
0968
0969     while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
0970         switch (s->lzma2.sequence) {
0971         case SEQ_CONTROL:
0972             /*
0973              * LZMA2 control byte
0974              *
0975              * Exact values:
0976              *   0x00   End marker
0977              *   0x01   Dictionary reset followed by
0978              *          an uncompressed chunk
0979              *   0x02   Uncompressed chunk (no dictionary reset)
0980              *
0981              * Highest three bits (s->control & 0xE0):
0982              *   0xE0   Dictionary reset, new properties and state
0983              *          reset, followed by LZMA compressed chunk
0984              *   0xC0   New properties and state reset, followed
0985              *          by LZMA compressed chunk (no dictionary
0986              *          reset)
0987              *   0xA0   State reset using old properties,
0988              *          followed by LZMA compressed chunk (no
0989              *          dictionary reset)
0990              *   0x80   LZMA chunk (no dictionary or state reset)
0991              *
0992              * For LZMA compressed chunks, the lowest five bits
0993              * (s->control & 1F) are the highest bits of the
0994              * uncompressed size (bits 16-20).
0995              *
0996              * A new LZMA2 stream must begin with a dictionary
0997              * reset. The first LZMA chunk must set new
0998              * properties and reset the LZMA state.
0999              *
1000              * Values that don't match anything described above
1001              * are invalid and we return XZ_DATA_ERROR.
1002              */
1003             tmp = b->in[b->in_pos++];
1004
1005             if (tmp == 0x00)
1006                 return XZ_STREAM_END;
1007
1008             if (tmp >= 0xE0 || tmp == 0x01) {
1009                 s->lzma2.need_props = true;
1010                 s->lzma2.need_dict_reset = false;
1011                 dict_reset(&s->dict, b);
1012             } else if (s->lzma2.need_dict_reset) {
1013                 return XZ_DATA_ERROR;
1014             }
1015
1016             if (tmp >= 0x80) {
1017                 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
1018                 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
1019
1020                 if (tmp >= 0xC0) {
1021                     /*
1022                      * When there are new properties,
1023                      * state reset is done at
1024                      * SEQ_PROPERTIES.
1025                      */
1026                     s->lzma2.need_props = false;
1027                     s->lzma2.next_sequence
1028                             = SEQ_PROPERTIES;
1029
1030                 } else if (s->lzma2.need_props) {
1031                     return XZ_DATA_ERROR;
1032
1033                 } else {
1034                     s->lzma2.next_sequence
1035                             = SEQ_LZMA_PREPARE;
1036                     if (tmp >= 0xA0)
1037                         lzma_reset(s);
1038                 }
1039             } else {
1040                 if (tmp > 0x02)
1041                     return XZ_DATA_ERROR;
1042
1043                 s->lzma2.sequence = SEQ_COMPRESSED_0;
1044                 s->lzma2.next_sequence = SEQ_COPY;
1045             }
1046
1047             break;
1048
1049         case SEQ_UNCOMPRESSED_1:
1050             s->lzma2.uncompressed
1051                     += (uint32_t)b->in[b->in_pos++] << 8;
1052             s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1053             break;
1054
1055         case SEQ_UNCOMPRESSED_2:
1056             s->lzma2.uncompressed
1057                     += (uint32_t)b->in[b->in_pos++] + 1;
1058             s->lzma2.sequence = SEQ_COMPRESSED_0;
1059             break;
1060
1061         case SEQ_COMPRESSED_0:
1062             s->lzma2.compressed
1063                     = (uint32_t)b->in[b->in_pos++] << 8;
1064             s->lzma2.sequence = SEQ_COMPRESSED_1;
1065             break;
1066
1067         case SEQ_COMPRESSED_1:
1068             s->lzma2.compressed
1069                     += (uint32_t)b->in[b->in_pos++] + 1;
1070             s->lzma2.sequence = s->lzma2.next_sequence;
1071             break;
1072
1073         case SEQ_PROPERTIES:
1074             if (!lzma_props(s, b->in[b->in_pos++]))
1075                 return XZ_DATA_ERROR;
1076
1077             s->lzma2.sequence = SEQ_LZMA_PREPARE;
1078
1079             fallthrough;
1080
1081         case SEQ_LZMA_PREPARE:
1082             if (s->lzma2.compressed < RC_INIT_BYTES)
1083                 return XZ_DATA_ERROR;
1084
1085             if (!rc_read_init(&s->rc, b))
1086                 return XZ_OK;
1087
1088             s->lzma2.compressed -= RC_INIT_BYTES;
1089             s->lzma2.sequence = SEQ_LZMA_RUN;
1090
1091             fallthrough;
1092
1093         case SEQ_LZMA_RUN:
1094             /*
1095              * Set dictionary limit to indicate how much we want
1096              * to be encoded at maximum. Decode new data into the
1097              * dictionary. Flush the new data from dictionary to
1098              * b->out. Check if we finished decoding this chunk.
1099              * In case the dictionary got full but we didn't fill
1100              * the output buffer yet, we may run this loop
1101              * multiple times without changing s->lzma2.sequence.
1102              */
1103             dict_limit(&s->dict, min_t(size_t,
1104                     b->out_size - b->out_pos,
1105                     s->lzma2.uncompressed));
1106             if (!lzma2_lzma(s, b))
1107                 return XZ_DATA_ERROR;
1108
1109             s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1110
1111             if (s->lzma2.uncompressed == 0) {
1112                 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1113                         || !rc_is_finished(&s->rc))
1114                     return XZ_DATA_ERROR;
1115
1116                 rc_reset(&s->rc);
1117                 s->lzma2.sequence = SEQ_CONTROL;
1118
1119             } else if (b->out_pos == b->out_size
1120                     || (b->in_pos == b->in_size
1121                         && s->temp.size
1122                         < s->lzma2.compressed)) {
1123                 return XZ_OK;
1124             }
1125
1126             break;
1127
1128         case SEQ_COPY:
1129             dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1130             if (s->lzma2.compressed > 0)
1131                 return XZ_OK;
1132
1133             s->lzma2.sequence = SEQ_CONTROL;
1134             break;
1135         }
1136     }
1137
1138     return XZ_OK;
1139 }
1140
1141 XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
1142                            uint32_t dict_max)
1143 {
1144     struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
1145     if (s == NULL)
1146         return NULL;
1147
1148     s->dict.mode = mode;
1149     s->dict.size_max = dict_max;
1150
1151     if (DEC_IS_PREALLOC(mode)) {
1152         s->dict.buf = vmalloc(dict_max);
1153         if (s->dict.buf == NULL) {
1154             kfree(s);
1155             return NULL;
1156         }
1157     } else if (DEC_IS_DYNALLOC(mode)) {
1158         s->dict.buf = NULL;
1159         s->dict.allocated = 0;
1160     }
1161
1162     return s;
1163 }
1164
1165 XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
1166 {
1167     /* This limits dictionary size to 3 GiB to keep parsing simpler. */
1168     if (props > 39)
1169         return XZ_OPTIONS_ERROR;
1170
1171     s->dict.size = 2 + (props & 1);
1172     s->dict.size <<= (props >> 1) + 11;
1173
1174     if (DEC_IS_MULTI(s->dict.mode)) {
1175         if (s->dict.size > s->dict.size_max)
1176             return XZ_MEMLIMIT_ERROR;
1177
1178         s->dict.end = s->dict.size;
1179
1180         if (DEC_IS_DYNALLOC(s->dict.mode)) {
1181             if (s->dict.allocated < s->dict.size) {
1182                 s->dict.allocated = s->dict.size;
1183                 vfree(s->dict.buf);
1184                 s->dict.buf = vmalloc(s->dict.size);
1185                 if (s->dict.buf == NULL) {
1186                     s->dict.allocated = 0;
1187                     return XZ_MEM_ERROR;
1188                 }
1189             }
1190         }
1191     }
1192
1193     s->lzma2.sequence = SEQ_CONTROL;
1194     s->lzma2.need_dict_reset = true;
1195
1196     s->temp.size = 0;
1197
1198     return XZ_OK;
1199 }
1200
1201 XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1202 {
1203     if (DEC_IS_MULTI(s->dict.mode))
1204         vfree(s->dict.buf);
1205
1206     kfree(s);
1207 }
1208
1209 #ifdef XZ_DEC_MICROLZMA
1210 /* This is a wrapper struct to have a nice struct name in the public API. */
1211 struct xz_dec_microlzma {
1212     struct xz_dec_lzma2 s;
1213 };
1214
1215 enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
1216                  struct xz_buf *b)
1217 {
1218     struct xz_dec_lzma2 *s = &s_ptr->s;
1219
1220     /*
1221      * sequence is SEQ_PROPERTIES before the first input byte,
1222      * SEQ_LZMA_PREPARE until a total of five bytes have been read,
1223      * and SEQ_LZMA_RUN for the rest of the input stream.
1224      */
1225     if (s->lzma2.sequence != SEQ_LZMA_RUN) {
1226         if (s->lzma2.sequence == SEQ_PROPERTIES) {
1227             /* One byte is needed for the props. */
1228             if (b->in_pos >= b->in_size)
1229                 return XZ_OK;
1230
1231             /*
1232              * Don't increment b->in_pos here. The same byte is
1233              * also passed to rc_read_init() which will ignore it.
1234              */
1235             if (!lzma_props(s, ~b->in[b->in_pos]))
1236                 return XZ_DATA_ERROR;
1237
1238             s->lzma2.sequence = SEQ_LZMA_PREPARE;
1239         }
1240
1241         /*
1242          * xz_dec_microlzma_reset() doesn't validate the compressed
1243          * size so we do it here. We have to limit the maximum size
1244          * to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice
1245          * round number and much more than users of this code should
1246          * ever need.
1247          */
1248         if (s->lzma2.compressed < RC_INIT_BYTES
1249                 || s->lzma2.compressed > (3U << 30))
1250             return XZ_DATA_ERROR;
1251
1252         if (!rc_read_init(&s->rc, b))
1253             return XZ_OK;
1254
1255         s->lzma2.compressed -= RC_INIT_BYTES;
1256         s->lzma2.sequence = SEQ_LZMA_RUN;
1257
1258         dict_reset(&s->dict, b);
1259     }
1260
1261     /* This is to allow increasing b->out_size between calls. */
1262     if (DEC_IS_SINGLE(s->dict.mode))
1263         s->dict.end = b->out_size - b->out_pos;
1264
1265     while (true) {
1266         dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,
1267                        s->lzma2.uncompressed));
1268
1269         if (!lzma2_lzma(s, b))
1270             return XZ_DATA_ERROR;
1271
1272         s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1273
1274         if (s->lzma2.uncompressed == 0) {
1275             if (s->lzma2.pedantic_microlzma) {
1276                 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1277                         || !rc_is_finished(&s->rc))
1278                     return XZ_DATA_ERROR;
1279             }
1280
1281             return XZ_STREAM_END;
1282         }
1283
1284         if (b->out_pos == b->out_size)
1285             return XZ_OK;
1286
1287         if (b->in_pos == b->in_size
1288                 && s->temp.size < s->lzma2.compressed)
1289             return XZ_OK;
1290     }
1291 }
1292
1293 struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
1294                         uint32_t dict_size)
1295 {
1296     struct xz_dec_microlzma *s;
1297
1298     /* Restrict dict_size to the same range as in the LZMA2 code. */
1299     if (dict_size < 4096 || dict_size > (3U << 30))
1300         return NULL;
1301
1302     s = kmalloc(sizeof(*s), GFP_KERNEL);
1303     if (s == NULL)
1304         return NULL;
1305
1306     s->s.dict.mode = mode;
1307     s->s.dict.size = dict_size;
1308
1309     if (DEC_IS_MULTI(mode)) {
1310         s->s.dict.end = dict_size;
1311
1312         s->s.dict.buf = vmalloc(dict_size);
1313         if (s->s.dict.buf == NULL) {
1314             kfree(s);
1315             return NULL;
1316         }
1317     }
1318
1319     return s;
1320 }
1321
1322 void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
1323                 uint32_t uncomp_size, int uncomp_size_is_exact)
1324 {
1325     /*
1326      * comp_size is validated in xz_dec_microlzma_run().
1327      * uncomp_size can safely be anything.
1328      */
1329     s->s.lzma2.compressed = comp_size;
1330     s->s.lzma2.uncompressed = uncomp_size;
1331     s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;
1332
1333     s->s.lzma2.sequence = SEQ_PROPERTIES;
1334     s->s.temp.size = 0;
1335 }
1336
1337 void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
1338 {
1339     if (DEC_IS_MULTI(s->s.dict.mode))
1340         vfree(s->s.dict.buf);
1341
1342     kfree(s);
1343 }
1344 #endif