Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) Yann Collet, Facebook, Inc.
0003  * All rights reserved.
0004  *
0005  * This source code is licensed under both the BSD-style license (found in the
0006  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
0007  * in the COPYING file in the root directory of this source tree).
0008  * You may select, at your option, one of the above-listed licenses.
0009  */
0010 
0011  /*-*************************************
0012  *  Dependencies
0013  ***************************************/
0014 #include "zstd_compress_sequences.h"
0015 
0016 /*
0017  * -log2(x / 256) lookup table for x in [0, 256).
0018  * If x == 0: Return 0
0019  * Else: Return floor(-log2(x / 256) * 256)
0020  */
0021 static unsigned const kInverseProbabilityLog256[256] = {
0022     0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
0023     1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
0024     874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
0025     724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
0026     618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
0027     535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
0028     468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
0029     411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
0030     362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
0031     318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
0032     279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
0033     244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
0034     212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
0035     182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
0036     155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
0037     130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
0038     106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
0039     83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
0040     62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
0041     42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
0042     23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
0043     5,    4,    2,    1,
0044 };
0045 
0046 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
0047   void const* ptr = ctable;
0048   U16 const* u16ptr = (U16 const*)ptr;
0049   U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
0050   return maxSymbolValue;
0051 }
0052 
0053 /*
0054  * Returns true if we should use ncount=-1 else we should
0055  * use ncount=1 for low probability symbols instead.
0056  */
0057 static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
0058 {
0059     /* Heuristic: This should cover most blocks <= 16K and
0060      * start to fade out after 16K to about 32K depending on
0061      * comprssibility.
0062      */
0063     return nbSeq >= 2048;
0064 }
0065 
0066 /*
0067  * Returns the cost in bytes of encoding the normalized count header.
0068  * Returns an error if any of the helper functions return an error.
0069  */
0070 static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
0071                               size_t const nbSeq, unsigned const FSELog)
0072 {
0073     BYTE wksp[FSE_NCOUNTBOUND];
0074     S16 norm[MaxSeq + 1];
0075     const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
0076     FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
0077     return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
0078 }
0079 
0080 /*
0081  * Returns the cost in bits of encoding the distribution described by count
0082  * using the entropy bound.
0083  */
0084 static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
0085 {
0086     unsigned cost = 0;
0087     unsigned s;
0088     for (s = 0; s <= max; ++s) {
0089         unsigned norm = (unsigned)((256 * count[s]) / total);
0090         if (count[s] != 0 && norm == 0)
0091             norm = 1;
0092         assert(count[s] < total);
0093         cost += count[s] * kInverseProbabilityLog256[norm];
0094     }
0095     return cost >> 8;
0096 }
0097 
0098 /*
0099  * Returns the cost in bits of encoding the distribution in count using ctable.
0100  * Returns an error if ctable cannot represent all the symbols in count.
0101  */
0102 size_t ZSTD_fseBitCost(
0103     FSE_CTable const* ctable,
0104     unsigned const* count,
0105     unsigned const max)
0106 {
0107     unsigned const kAccuracyLog = 8;
0108     size_t cost = 0;
0109     unsigned s;
0110     FSE_CState_t cstate;
0111     FSE_initCState(&cstate, ctable);
0112     if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
0113         DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
0114                     ZSTD_getFSEMaxSymbolValue(ctable), max);
0115         return ERROR(GENERIC);
0116     }
0117     for (s = 0; s <= max; ++s) {
0118         unsigned const tableLog = cstate.stateLog;
0119         unsigned const badCost = (tableLog + 1) << kAccuracyLog;
0120         unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
0121         if (count[s] == 0)
0122             continue;
0123         if (bitCost >= badCost) {
0124             DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
0125             return ERROR(GENERIC);
0126         }
0127         cost += (size_t)count[s] * bitCost;
0128     }
0129     return cost >> kAccuracyLog;
0130 }
0131 
0132 /*
0133  * Returns the cost in bits of encoding the distribution in count using the
0134  * table described by norm. The max symbol support by norm is assumed >= max.
0135  * norm must be valid for every symbol with non-zero probability in count.
0136  */
0137 size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
0138                              unsigned const* count, unsigned const max)
0139 {
0140     unsigned const shift = 8 - accuracyLog;
0141     size_t cost = 0;
0142     unsigned s;
0143     assert(accuracyLog <= 8);
0144     for (s = 0; s <= max; ++s) {
0145         unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
0146         unsigned const norm256 = normAcc << shift;
0147         assert(norm256 > 0);
0148         assert(norm256 < 256);
0149         cost += count[s] * kInverseProbabilityLog256[norm256];
0150     }
0151     return cost >> 8;
0152 }
0153 
0154 symbolEncodingType_e
0155 ZSTD_selectEncodingType(
0156         FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
0157         size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
0158         FSE_CTable const* prevCTable,
0159         short const* defaultNorm, U32 defaultNormLog,
0160         ZSTD_defaultPolicy_e const isDefaultAllowed,
0161         ZSTD_strategy const strategy)
0162 {
0163     ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
0164     if (mostFrequent == nbSeq) {
0165         *repeatMode = FSE_repeat_none;
0166         if (isDefaultAllowed && nbSeq <= 2) {
0167             /* Prefer set_basic over set_rle when there are 2 or less symbols,
0168              * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
0169              * If basic encoding isn't possible, always choose RLE.
0170              */
0171             DEBUGLOG(5, "Selected set_basic");
0172             return set_basic;
0173         }
0174         DEBUGLOG(5, "Selected set_rle");
0175         return set_rle;
0176     }
0177     if (strategy < ZSTD_lazy) {
0178         if (isDefaultAllowed) {
0179             size_t const staticFse_nbSeq_max = 1000;
0180             size_t const mult = 10 - strategy;
0181             size_t const baseLog = 3;
0182             size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
0183             assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
0184             assert(mult <= 9 && mult >= 7);
0185             if ( (*repeatMode == FSE_repeat_valid)
0186               && (nbSeq < staticFse_nbSeq_max) ) {
0187                 DEBUGLOG(5, "Selected set_repeat");
0188                 return set_repeat;
0189             }
0190             if ( (nbSeq < dynamicFse_nbSeq_min)
0191               || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
0192                 DEBUGLOG(5, "Selected set_basic");
0193                 /* The format allows default tables to be repeated, but it isn't useful.
0194                  * When using simple heuristics to select encoding type, we don't want
0195                  * to confuse these tables with dictionaries. When running more careful
0196                  * analysis, we don't need to waste time checking both repeating tables
0197                  * and default tables.
0198                  */
0199                 *repeatMode = FSE_repeat_none;
0200                 return set_basic;
0201             }
0202         }
0203     } else {
0204         size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
0205         size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
0206         size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
0207         size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
0208 
0209         if (isDefaultAllowed) {
0210             assert(!ZSTD_isError(basicCost));
0211             assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
0212         }
0213         assert(!ZSTD_isError(NCountCost));
0214         assert(compressedCost < ERROR(maxCode));
0215         DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
0216                     (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
0217         if (basicCost <= repeatCost && basicCost <= compressedCost) {
0218             DEBUGLOG(5, "Selected set_basic");
0219             assert(isDefaultAllowed);
0220             *repeatMode = FSE_repeat_none;
0221             return set_basic;
0222         }
0223         if (repeatCost <= compressedCost) {
0224             DEBUGLOG(5, "Selected set_repeat");
0225             assert(!ZSTD_isError(repeatCost));
0226             return set_repeat;
0227         }
0228         assert(compressedCost < basicCost && compressedCost < repeatCost);
0229     }
0230     DEBUGLOG(5, "Selected set_compressed");
0231     *repeatMode = FSE_repeat_check;
0232     return set_compressed;
0233 }
0234 
0235 typedef struct {
0236     S16 norm[MaxSeq + 1];
0237     U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
0238 } ZSTD_BuildCTableWksp;
0239 
0240 size_t
0241 ZSTD_buildCTable(void* dst, size_t dstCapacity,
0242                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
0243                 unsigned* count, U32 max,
0244                 const BYTE* codeTable, size_t nbSeq,
0245                 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
0246                 const FSE_CTable* prevCTable, size_t prevCTableSize,
0247                 void* entropyWorkspace, size_t entropyWorkspaceSize)
0248 {
0249     BYTE* op = (BYTE*)dst;
0250     const BYTE* const oend = op + dstCapacity;
0251     DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
0252 
0253     switch (type) {
0254     case set_rle:
0255         FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
0256         RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
0257         *op = codeTable[0];
0258         return 1;
0259     case set_repeat:
0260         ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
0261         return 0;
0262     case set_basic:
0263         FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
0264         return 0;
0265     case set_compressed: {
0266         ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
0267         size_t nbSeq_1 = nbSeq;
0268         const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
0269         if (count[codeTable[nbSeq-1]] > 1) {
0270             count[codeTable[nbSeq-1]]--;
0271             nbSeq_1--;
0272         }
0273         assert(nbSeq_1 > 1);
0274         assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
0275         (void)entropyWorkspaceSize;
0276         FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
0277         {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
0278             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
0279             FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
0280             return NCountSize;
0281         }
0282     }
0283     default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
0284     }
0285 }
0286 
0287 FORCE_INLINE_TEMPLATE size_t
0288 ZSTD_encodeSequences_body(
0289             void* dst, size_t dstCapacity,
0290             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0291             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0292             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0293             seqDef const* sequences, size_t nbSeq, int longOffsets)
0294 {
0295     BIT_CStream_t blockStream;
0296     FSE_CState_t  stateMatchLength;
0297     FSE_CState_t  stateOffsetBits;
0298     FSE_CState_t  stateLitLength;
0299 
0300     RETURN_ERROR_IF(
0301         ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
0302         dstSize_tooSmall, "not enough space remaining");
0303     DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
0304                 (int)(blockStream.endPtr - blockStream.startPtr),
0305                 (unsigned)dstCapacity);
0306 
0307     /* first symbols */
0308     FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
0309     FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
0310     FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
0311     BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
0312     if (MEM_32bits()) BIT_flushBits(&blockStream);
0313     BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
0314     if (MEM_32bits()) BIT_flushBits(&blockStream);
0315     if (longOffsets) {
0316         U32 const ofBits = ofCodeTable[nbSeq-1];
0317         unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
0318         if (extraBits) {
0319             BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
0320             BIT_flushBits(&blockStream);
0321         }
0322         BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
0323                     ofBits - extraBits);
0324     } else {
0325         BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
0326     }
0327     BIT_flushBits(&blockStream);
0328 
0329     {   size_t n;
0330         for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
0331             BYTE const llCode = llCodeTable[n];
0332             BYTE const ofCode = ofCodeTable[n];
0333             BYTE const mlCode = mlCodeTable[n];
0334             U32  const llBits = LL_bits[llCode];
0335             U32  const ofBits = ofCode;
0336             U32  const mlBits = ML_bits[mlCode];
0337             DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
0338                         (unsigned)sequences[n].litLength,
0339                         (unsigned)sequences[n].matchLength + MINMATCH,
0340                         (unsigned)sequences[n].offset);
0341                                                                             /* 32b*/  /* 64b*/
0342                                                                             /* (7)*/  /* (7)*/
0343             FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
0344             FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
0345             if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
0346             FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
0347             if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
0348                 BIT_flushBits(&blockStream);                                /* (7)*/
0349             BIT_addBits(&blockStream, sequences[n].litLength, llBits);
0350             if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
0351             BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
0352             if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
0353             if (longOffsets) {
0354                 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
0355                 if (extraBits) {
0356                     BIT_addBits(&blockStream, sequences[n].offset, extraBits);
0357                     BIT_flushBits(&blockStream);                            /* (7)*/
0358                 }
0359                 BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
0360                             ofBits - extraBits);                            /* 31 */
0361             } else {
0362                 BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
0363             }
0364             BIT_flushBits(&blockStream);                                    /* (7)*/
0365             DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
0366     }   }
0367 
0368     DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
0369     FSE_flushCState(&blockStream, &stateMatchLength);
0370     DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
0371     FSE_flushCState(&blockStream, &stateOffsetBits);
0372     DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
0373     FSE_flushCState(&blockStream, &stateLitLength);
0374 
0375     {   size_t const streamSize = BIT_closeCStream(&blockStream);
0376         RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
0377         return streamSize;
0378     }
0379 }
0380 
0381 static size_t
0382 ZSTD_encodeSequences_default(
0383             void* dst, size_t dstCapacity,
0384             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0385             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0386             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0387             seqDef const* sequences, size_t nbSeq, int longOffsets)
0388 {
0389     return ZSTD_encodeSequences_body(dst, dstCapacity,
0390                                     CTable_MatchLength, mlCodeTable,
0391                                     CTable_OffsetBits, ofCodeTable,
0392                                     CTable_LitLength, llCodeTable,
0393                                     sequences, nbSeq, longOffsets);
0394 }
0395 
0396 
0397 #if DYNAMIC_BMI2
0398 
0399 static TARGET_ATTRIBUTE("bmi2") size_t
0400 ZSTD_encodeSequences_bmi2(
0401             void* dst, size_t dstCapacity,
0402             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0403             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0404             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0405             seqDef const* sequences, size_t nbSeq, int longOffsets)
0406 {
0407     return ZSTD_encodeSequences_body(dst, dstCapacity,
0408                                     CTable_MatchLength, mlCodeTable,
0409                                     CTable_OffsetBits, ofCodeTable,
0410                                     CTable_LitLength, llCodeTable,
0411                                     sequences, nbSeq, longOffsets);
0412 }
0413 
0414 #endif
0415 
0416 size_t ZSTD_encodeSequences(
0417             void* dst, size_t dstCapacity,
0418             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0419             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0420             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0421             seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
0422 {
0423     DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
0424 #if DYNAMIC_BMI2
0425     if (bmi2) {
0426         return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
0427                                          CTable_MatchLength, mlCodeTable,
0428                                          CTable_OffsetBits, ofCodeTable,
0429                                          CTable_LitLength, llCodeTable,
0430                                          sequences, nbSeq, longOffsets);
0431     }
0432 #endif
0433     (void)bmi2;
0434     return ZSTD_encodeSequences_default(dst, dstCapacity,
0435                                         CTable_MatchLength, mlCodeTable,
0436                                         CTable_OffsetBits, ofCodeTable,
0437                                         CTable_LitLength, llCodeTable,
0438                                         sequences, nbSeq, longOffsets);
0439 }