Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Branch/Call/Jump (BCJ) filter decoders
0003  *
0004  * Authors: Lasse Collin <lasse.collin@tukaani.org>
0005  *          Igor Pavlov <https://7-zip.org/>
0006  *
0007  * This file has been put into the public domain.
0008  * You can do whatever you want with this file.
0009  */
0010 
0011 #include "xz_private.h"
0012 
0013 /*
0014  * The rest of the file is inside this ifdef. It makes things a little more
0015  * convenient when building without support for any BCJ filters.
0016  */
0017 #ifdef XZ_DEC_BCJ
0018 
0019 struct xz_dec_bcj {
0020     /* Type of the BCJ filter being used */
0021     enum {
0022         BCJ_X86 = 4,        /* x86 or x86-64 */
0023         BCJ_POWERPC = 5,    /* Big endian only */
0024         BCJ_IA64 = 6,       /* Big or little endian */
0025         BCJ_ARM = 7,        /* Little endian only */
0026         BCJ_ARMTHUMB = 8,   /* Little endian only */
0027         BCJ_SPARC = 9       /* Big or little endian */
0028     } type;
0029 
0030     /*
0031      * Return value of the next filter in the chain. We need to preserve
0032      * this information across calls, because we must not call the next
0033      * filter anymore once it has returned XZ_STREAM_END.
0034      */
0035     enum xz_ret ret;
0036 
0037     /* True if we are operating in single-call mode. */
0038     bool single_call;
0039 
0040     /*
0041      * Absolute position relative to the beginning of the uncompressed
0042      * data (in a single .xz Block). We care only about the lowest 32
0043      * bits so this doesn't need to be uint64_t even with big files.
0044      */
0045     uint32_t pos;
0046 
0047     /* x86 filter state */
0048     uint32_t x86_prev_mask;
0049 
0050     /* Temporary space to hold the variables from struct xz_buf */
0051     uint8_t *out;
0052     size_t out_pos;
0053     size_t out_size;
0054 
0055     struct {
0056         /* Amount of already filtered data in the beginning of buf */
0057         size_t filtered;
0058 
0059         /* Total amount of data currently stored in buf  */
0060         size_t size;
0061 
0062         /*
0063          * Buffer to hold a mix of filtered and unfiltered data. This
0064          * needs to be big enough to hold Alignment + 2 * Look-ahead:
0065          *
0066          * Type         Alignment   Look-ahead
0067          * x86              1           4
0068          * PowerPC          4           0
0069          * IA-64           16           0
0070          * ARM              4           0
0071          * ARM-Thumb        2           2
0072          * SPARC            4           0
0073          */
0074         uint8_t buf[16];
0075     } temp;
0076 };
0077 
0078 #ifdef XZ_DEC_X86
0079 /*
0080  * This is used to test the most significant byte of a memory address
0081  * in an x86 instruction.
0082  */
0083 static inline int bcj_x86_test_msbyte(uint8_t b)
0084 {
0085     return b == 0x00 || b == 0xFF;
0086 }
0087 
0088 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
0089 {
0090     static const bool mask_to_allowed_status[8]
0091         = { true, true, true, false, true, false, false, false };
0092 
0093     static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
0094 
0095     size_t i;
0096     size_t prev_pos = (size_t)-1;
0097     uint32_t prev_mask = s->x86_prev_mask;
0098     uint32_t src;
0099     uint32_t dest;
0100     uint32_t j;
0101     uint8_t b;
0102 
0103     if (size <= 4)
0104         return 0;
0105 
0106     size -= 4;
0107     for (i = 0; i < size; ++i) {
0108         if ((buf[i] & 0xFE) != 0xE8)
0109             continue;
0110 
0111         prev_pos = i - prev_pos;
0112         if (prev_pos > 3) {
0113             prev_mask = 0;
0114         } else {
0115             prev_mask = (prev_mask << (prev_pos - 1)) & 7;
0116             if (prev_mask != 0) {
0117                 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
0118                 if (!mask_to_allowed_status[prev_mask]
0119                         || bcj_x86_test_msbyte(b)) {
0120                     prev_pos = i;
0121                     prev_mask = (prev_mask << 1) | 1;
0122                     continue;
0123                 }
0124             }
0125         }
0126 
0127         prev_pos = i;
0128 
0129         if (bcj_x86_test_msbyte(buf[i + 4])) {
0130             src = get_unaligned_le32(buf + i + 1);
0131             while (true) {
0132                 dest = src - (s->pos + (uint32_t)i + 5);
0133                 if (prev_mask == 0)
0134                     break;
0135 
0136                 j = mask_to_bit_num[prev_mask] * 8;
0137                 b = (uint8_t)(dest >> (24 - j));
0138                 if (!bcj_x86_test_msbyte(b))
0139                     break;
0140 
0141                 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
0142             }
0143 
0144             dest &= 0x01FFFFFF;
0145             dest |= (uint32_t)0 - (dest & 0x01000000);
0146             put_unaligned_le32(dest, buf + i + 1);
0147             i += 4;
0148         } else {
0149             prev_mask = (prev_mask << 1) | 1;
0150         }
0151     }
0152 
0153     prev_pos = i - prev_pos;
0154     s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
0155     return i;
0156 }
0157 #endif
0158 
0159 #ifdef XZ_DEC_POWERPC
0160 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
0161 {
0162     size_t i;
0163     uint32_t instr;
0164 
0165     for (i = 0; i + 4 <= size; i += 4) {
0166         instr = get_unaligned_be32(buf + i);
0167         if ((instr & 0xFC000003) == 0x48000001) {
0168             instr &= 0x03FFFFFC;
0169             instr -= s->pos + (uint32_t)i;
0170             instr &= 0x03FFFFFC;
0171             instr |= 0x48000001;
0172             put_unaligned_be32(instr, buf + i);
0173         }
0174     }
0175 
0176     return i;
0177 }
0178 #endif
0179 
0180 #ifdef XZ_DEC_IA64
0181 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
0182 {
0183     static const uint8_t branch_table[32] = {
0184         0, 0, 0, 0, 0, 0, 0, 0,
0185         0, 0, 0, 0, 0, 0, 0, 0,
0186         4, 4, 6, 6, 0, 0, 7, 7,
0187         4, 4, 0, 0, 4, 4, 0, 0
0188     };
0189 
0190     /*
0191      * The local variables take a little bit stack space, but it's less
0192      * than what LZMA2 decoder takes, so it doesn't make sense to reduce
0193      * stack usage here without doing that for the LZMA2 decoder too.
0194      */
0195 
0196     /* Loop counters */
0197     size_t i;
0198     size_t j;
0199 
0200     /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
0201     uint32_t slot;
0202 
0203     /* Bitwise offset of the instruction indicated by slot */
0204     uint32_t bit_pos;
0205 
0206     /* bit_pos split into byte and bit parts */
0207     uint32_t byte_pos;
0208     uint32_t bit_res;
0209 
0210     /* Address part of an instruction */
0211     uint32_t addr;
0212 
0213     /* Mask used to detect which instructions to convert */
0214     uint32_t mask;
0215 
0216     /* 41-bit instruction stored somewhere in the lowest 48 bits */
0217     uint64_t instr;
0218 
0219     /* Instruction normalized with bit_res for easier manipulation */
0220     uint64_t norm;
0221 
0222     for (i = 0; i + 16 <= size; i += 16) {
0223         mask = branch_table[buf[i] & 0x1F];
0224         for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
0225             if (((mask >> slot) & 1) == 0)
0226                 continue;
0227 
0228             byte_pos = bit_pos >> 3;
0229             bit_res = bit_pos & 7;
0230             instr = 0;
0231             for (j = 0; j < 6; ++j)
0232                 instr |= (uint64_t)(buf[i + j + byte_pos])
0233                         << (8 * j);
0234 
0235             norm = instr >> bit_res;
0236 
0237             if (((norm >> 37) & 0x0F) == 0x05
0238                     && ((norm >> 9) & 0x07) == 0) {
0239                 addr = (norm >> 13) & 0x0FFFFF;
0240                 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
0241                 addr <<= 4;
0242                 addr -= s->pos + (uint32_t)i;
0243                 addr >>= 4;
0244 
0245                 norm &= ~((uint64_t)0x8FFFFF << 13);
0246                 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
0247                 norm |= (uint64_t)(addr & 0x100000)
0248                         << (36 - 20);
0249 
0250                 instr &= (1 << bit_res) - 1;
0251                 instr |= norm << bit_res;
0252 
0253                 for (j = 0; j < 6; j++)
0254                     buf[i + j + byte_pos]
0255                         = (uint8_t)(instr >> (8 * j));
0256             }
0257         }
0258     }
0259 
0260     return i;
0261 }
0262 #endif
0263 
0264 #ifdef XZ_DEC_ARM
0265 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
0266 {
0267     size_t i;
0268     uint32_t addr;
0269 
0270     for (i = 0; i + 4 <= size; i += 4) {
0271         if (buf[i + 3] == 0xEB) {
0272             addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
0273                     | ((uint32_t)buf[i + 2] << 16);
0274             addr <<= 2;
0275             addr -= s->pos + (uint32_t)i + 8;
0276             addr >>= 2;
0277             buf[i] = (uint8_t)addr;
0278             buf[i + 1] = (uint8_t)(addr >> 8);
0279             buf[i + 2] = (uint8_t)(addr >> 16);
0280         }
0281     }
0282 
0283     return i;
0284 }
0285 #endif
0286 
0287 #ifdef XZ_DEC_ARMTHUMB
0288 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
0289 {
0290     size_t i;
0291     uint32_t addr;
0292 
0293     for (i = 0; i + 4 <= size; i += 2) {
0294         if ((buf[i + 1] & 0xF8) == 0xF0
0295                 && (buf[i + 3] & 0xF8) == 0xF8) {
0296             addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
0297                     | ((uint32_t)buf[i] << 11)
0298                     | (((uint32_t)buf[i + 3] & 0x07) << 8)
0299                     | (uint32_t)buf[i + 2];
0300             addr <<= 1;
0301             addr -= s->pos + (uint32_t)i + 4;
0302             addr >>= 1;
0303             buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
0304             buf[i] = (uint8_t)(addr >> 11);
0305             buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
0306             buf[i + 2] = (uint8_t)addr;
0307             i += 2;
0308         }
0309     }
0310 
0311     return i;
0312 }
0313 #endif
0314 
0315 #ifdef XZ_DEC_SPARC
0316 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
0317 {
0318     size_t i;
0319     uint32_t instr;
0320 
0321     for (i = 0; i + 4 <= size; i += 4) {
0322         instr = get_unaligned_be32(buf + i);
0323         if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
0324             instr <<= 2;
0325             instr -= s->pos + (uint32_t)i;
0326             instr >>= 2;
0327             instr = ((uint32_t)0x40000000 - (instr & 0x400000))
0328                     | 0x40000000 | (instr & 0x3FFFFF);
0329             put_unaligned_be32(instr, buf + i);
0330         }
0331     }
0332 
0333     return i;
0334 }
0335 #endif
0336 
0337 /*
0338  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
0339  * of data that got filtered.
0340  *
0341  * NOTE: This is implemented as a switch statement to avoid using function
0342  * pointers, which could be problematic in the kernel boot code, which must
0343  * avoid pointers to static data (at least on x86).
0344  */
0345 static void bcj_apply(struct xz_dec_bcj *s,
0346               uint8_t *buf, size_t *pos, size_t size)
0347 {
0348     size_t filtered;
0349 
0350     buf += *pos;
0351     size -= *pos;
0352 
0353     switch (s->type) {
0354 #ifdef XZ_DEC_X86
0355     case BCJ_X86:
0356         filtered = bcj_x86(s, buf, size);
0357         break;
0358 #endif
0359 #ifdef XZ_DEC_POWERPC
0360     case BCJ_POWERPC:
0361         filtered = bcj_powerpc(s, buf, size);
0362         break;
0363 #endif
0364 #ifdef XZ_DEC_IA64
0365     case BCJ_IA64:
0366         filtered = bcj_ia64(s, buf, size);
0367         break;
0368 #endif
0369 #ifdef XZ_DEC_ARM
0370     case BCJ_ARM:
0371         filtered = bcj_arm(s, buf, size);
0372         break;
0373 #endif
0374 #ifdef XZ_DEC_ARMTHUMB
0375     case BCJ_ARMTHUMB:
0376         filtered = bcj_armthumb(s, buf, size);
0377         break;
0378 #endif
0379 #ifdef XZ_DEC_SPARC
0380     case BCJ_SPARC:
0381         filtered = bcj_sparc(s, buf, size);
0382         break;
0383 #endif
0384     default:
0385         /* Never reached but silence compiler warnings. */
0386         filtered = 0;
0387         break;
0388     }
0389 
0390     *pos += filtered;
0391     s->pos += filtered;
0392 }
0393 
0394 /*
0395  * Flush pending filtered data from temp to the output buffer.
0396  * Move the remaining mixture of possibly filtered and unfiltered
0397  * data to the beginning of temp.
0398  */
0399 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
0400 {
0401     size_t copy_size;
0402 
0403     copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
0404     memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
0405     b->out_pos += copy_size;
0406 
0407     s->temp.filtered -= copy_size;
0408     s->temp.size -= copy_size;
0409     memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
0410 }
0411 
0412 /*
0413  * The BCJ filter functions are primitive in sense that they process the
0414  * data in chunks of 1-16 bytes. To hide this issue, this function does
0415  * some buffering.
0416  */
0417 XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
0418                      struct xz_dec_lzma2 *lzma2,
0419                      struct xz_buf *b)
0420 {
0421     size_t out_start;
0422 
0423     /*
0424      * Flush pending already filtered data to the output buffer. Return
0425      * immediately if we couldn't flush everything, or if the next
0426      * filter in the chain had already returned XZ_STREAM_END.
0427      */
0428     if (s->temp.filtered > 0) {
0429         bcj_flush(s, b);
0430         if (s->temp.filtered > 0)
0431             return XZ_OK;
0432 
0433         if (s->ret == XZ_STREAM_END)
0434             return XZ_STREAM_END;
0435     }
0436 
0437     /*
0438      * If we have more output space than what is currently pending in
0439      * temp, copy the unfiltered data from temp to the output buffer
0440      * and try to fill the output buffer by decoding more data from the
0441      * next filter in the chain. Apply the BCJ filter on the new data
0442      * in the output buffer. If everything cannot be filtered, copy it
0443      * to temp and rewind the output buffer position accordingly.
0444      *
0445      * This needs to be always run when temp.size == 0 to handle a special
0446      * case where the output buffer is full and the next filter has no
0447      * more output coming but hasn't returned XZ_STREAM_END yet.
0448      */
0449     if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
0450         out_start = b->out_pos;
0451         memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
0452         b->out_pos += s->temp.size;
0453 
0454         s->ret = xz_dec_lzma2_run(lzma2, b);
0455         if (s->ret != XZ_STREAM_END
0456                 && (s->ret != XZ_OK || s->single_call))
0457             return s->ret;
0458 
0459         bcj_apply(s, b->out, &out_start, b->out_pos);
0460 
0461         /*
0462          * As an exception, if the next filter returned XZ_STREAM_END,
0463          * we can do that too, since the last few bytes that remain
0464          * unfiltered are meant to remain unfiltered.
0465          */
0466         if (s->ret == XZ_STREAM_END)
0467             return XZ_STREAM_END;
0468 
0469         s->temp.size = b->out_pos - out_start;
0470         b->out_pos -= s->temp.size;
0471         memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
0472 
0473         /*
0474          * If there wasn't enough input to the next filter to fill
0475          * the output buffer with unfiltered data, there's no point
0476          * to try decoding more data to temp.
0477          */
0478         if (b->out_pos + s->temp.size < b->out_size)
0479             return XZ_OK;
0480     }
0481 
0482     /*
0483      * We have unfiltered data in temp. If the output buffer isn't full
0484      * yet, try to fill the temp buffer by decoding more data from the
0485      * next filter. Apply the BCJ filter on temp. Then we hopefully can
0486      * fill the actual output buffer by copying filtered data from temp.
0487      * A mix of filtered and unfiltered data may be left in temp; it will
0488      * be taken care on the next call to this function.
0489      */
0490     if (b->out_pos < b->out_size) {
0491         /* Make b->out{,_pos,_size} temporarily point to s->temp. */
0492         s->out = b->out;
0493         s->out_pos = b->out_pos;
0494         s->out_size = b->out_size;
0495         b->out = s->temp.buf;
0496         b->out_pos = s->temp.size;
0497         b->out_size = sizeof(s->temp.buf);
0498 
0499         s->ret = xz_dec_lzma2_run(lzma2, b);
0500 
0501         s->temp.size = b->out_pos;
0502         b->out = s->out;
0503         b->out_pos = s->out_pos;
0504         b->out_size = s->out_size;
0505 
0506         if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
0507             return s->ret;
0508 
0509         bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
0510 
0511         /*
0512          * If the next filter returned XZ_STREAM_END, we mark that
0513          * everything is filtered, since the last unfiltered bytes
0514          * of the stream are meant to be left as is.
0515          */
0516         if (s->ret == XZ_STREAM_END)
0517             s->temp.filtered = s->temp.size;
0518 
0519         bcj_flush(s, b);
0520         if (s->temp.filtered > 0)
0521             return XZ_OK;
0522     }
0523 
0524     return s->ret;
0525 }
0526 
0527 XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
0528 {
0529     struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
0530     if (s != NULL)
0531         s->single_call = single_call;
0532 
0533     return s;
0534 }
0535 
0536 XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
0537 {
0538     switch (id) {
0539 #ifdef XZ_DEC_X86
0540     case BCJ_X86:
0541 #endif
0542 #ifdef XZ_DEC_POWERPC
0543     case BCJ_POWERPC:
0544 #endif
0545 #ifdef XZ_DEC_IA64
0546     case BCJ_IA64:
0547 #endif
0548 #ifdef XZ_DEC_ARM
0549     case BCJ_ARM:
0550 #endif
0551 #ifdef XZ_DEC_ARMTHUMB
0552     case BCJ_ARMTHUMB:
0553 #endif
0554 #ifdef XZ_DEC_SPARC
0555     case BCJ_SPARC:
0556 #endif
0557         break;
0558 
0559     default:
0560         /* Unsupported Filter ID */
0561         return XZ_OPTIONS_ERROR;
0562     }
0563 
0564     s->type = id;
0565     s->ret = XZ_OK;
0566     s->pos = 0;
0567     s->x86_prev_mask = 0;
0568     s->temp.filtered = 0;
0569     s->temp.size = 0;
0570 
0571     return XZ_OK;
0572 }
0573 
0574 #endif