Back to home page

OSCL-LXR

 
 

    


0001 /* ******************************************************************
0002  * Huffman encoder, part of New Generation Entropy library
0003  * Copyright (c) Yann Collet, Facebook, Inc.
0004  *
0005  *  You can contact the author at :
0006  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
0007  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
0008  *
0009  * This source code is licensed under both the BSD-style license (found in the
0010  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
0011  * in the COPYING file in the root directory of this source tree).
0012  * You may select, at your option, one of the above-listed licenses.
0013 ****************************************************************** */
0014 
0015 /* **************************************************************
0016 *  Compiler specifics
0017 ****************************************************************/
0018 
0019 
0020 /* **************************************************************
0021 *  Includes
0022 ****************************************************************/
0023 #include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
0024 #include "../common/compiler.h"
0025 #include "../common/bitstream.h"
0026 #include "hist.h"
0027 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
0028 #include "../common/fse.h"        /* header compression */
0029 #define HUF_STATIC_LINKING_ONLY
0030 #include "../common/huf.h"
0031 #include "../common/error_private.h"
0032 
0033 
0034 /* **************************************************************
0035 *  Error Management
0036 ****************************************************************/
0037 #define HUF_isError ERR_isError
0038 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
0039 
0040 
0041 /* **************************************************************
0042 *  Utils
0043 ****************************************************************/
0044 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
0045 {
0046     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
0047 }
0048 
0049 
0050 /* *******************************************************
0051 *  HUF : Huffman block compression
0052 *********************************************************/
0053 /* HUF_compressWeights() :
0054  * Same as FSE_compress(), but dedicated to huff0's weights compression.
0055  * The use case needs much less stack memory.
0056  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
0057  */
0058 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
0059 
0060 typedef struct {
0061     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
0062     U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
0063     unsigned count[HUF_TABLELOG_MAX+1];
0064     S16 norm[HUF_TABLELOG_MAX+1];
0065 } HUF_CompressWeightsWksp;
0066 
0067 static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
0068 {
0069     BYTE* const ostart = (BYTE*) dst;
0070     BYTE* op = ostart;
0071     BYTE* const oend = ostart + dstSize;
0072 
0073     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
0074     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
0075     HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
0076 
0077     if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
0078 
0079     /* init conditions */
0080     if (wtSize <= 1) return 0;  /* Not compressible */
0081 
0082     /* Scan input and build symbol stats */
0083     {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
0084         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
0085         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
0086     }
0087 
0088     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
0089     CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
0090 
0091     /* Write table description header */
0092     {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
0093         op += hSize;
0094     }
0095 
0096     /* Compress */
0097     CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
0098     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
0099         if (cSize == 0) return 0;   /* not enough space for compressed data */
0100         op += cSize;
0101     }
0102 
0103     return (size_t)(op-ostart);
0104 }
0105 
0106 
0107 typedef struct {
0108     HUF_CompressWeightsWksp wksp;
0109     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
0110     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
0111 } HUF_WriteCTableWksp;
0112 
0113 size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
0114                             const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
0115                             void* workspace, size_t workspaceSize)
0116 {
0117     BYTE* op = (BYTE*)dst;
0118     U32 n;
0119     HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
0120 
0121     /* check conditions */
0122     if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
0123     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
0124 
0125     /* convert to weight */
0126     wksp->bitsToWeight[0] = 0;
0127     for (n=1; n<huffLog+1; n++)
0128         wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
0129     for (n=0; n<maxSymbolValue; n++)
0130         wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
0131 
0132     /* attempt weights compression by FSE */
0133     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
0134         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
0135             op[0] = (BYTE)hSize;
0136             return hSize+1;
0137     }   }
0138 
0139     /* write raw values as 4-bits (max : 15) */
0140     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
0141     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
0142     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
0143     wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
0144     for (n=0; n<maxSymbolValue; n+=2)
0145         op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
0146     return ((maxSymbolValue+1)/2) + 1;
0147 }
0148 
0149 /*! HUF_writeCTable() :
0150     `CTable` : Huffman tree to save, using huf representation.
0151     @return : size of saved CTable */
0152 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
0153                         const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
0154 {
0155     HUF_WriteCTableWksp wksp;
0156     return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
0157 }
0158 
0159 
0160 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
0161 {
0162     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
0163     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
0164     U32 tableLog = 0;
0165     U32 nbSymbols = 0;
0166 
0167     /* get symbol weights */
0168     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
0169     *hasZeroWeights = (rankVal[0] > 0);
0170 
0171     /* check result */
0172     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
0173     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
0174 
0175     /* Prepare base value per rank */
0176     {   U32 n, nextRankStart = 0;
0177         for (n=1; n<=tableLog; n++) {
0178             U32 curr = nextRankStart;
0179             nextRankStart += (rankVal[n] << (n-1));
0180             rankVal[n] = curr;
0181     }   }
0182 
0183     /* fill nbBits */
0184     {   U32 n; for (n=0; n<nbSymbols; n++) {
0185             const U32 w = huffWeight[n];
0186             CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
0187     }   }
0188 
0189     /* fill val */
0190     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
0191         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
0192         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
0193         /* determine stating value per rank */
0194         valPerRank[tableLog+1] = 0;   /* for w==0 */
0195         {   U16 min = 0;
0196             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
0197                 valPerRank[n] = min;     /* get starting value within each rank */
0198                 min += nbPerRank[n];
0199                 min >>= 1;
0200         }   }
0201         /* assign value within rank, symbol order */
0202         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
0203     }
0204 
0205     *maxSymbolValuePtr = nbSymbols - 1;
0206     return readSize;
0207 }
0208 
0209 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
0210 {
0211     const HUF_CElt* table = (const HUF_CElt*)symbolTable;
0212     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
0213     return table[symbolValue].nbBits;
0214 }
0215 
0216 
0217 typedef struct nodeElt_s {
0218     U32 count;
0219     U16 parent;
0220     BYTE byte;
0221     BYTE nbBits;
0222 } nodeElt;
0223 
0224 /*
0225  * HUF_setMaxHeight():
0226  * Enforces maxNbBits on the Huffman tree described in huffNode.
0227  *
0228  * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
0229  * the tree to so that it is a valid canonical Huffman tree.
0230  *
0231  * @pre               The sum of the ranks of each symbol == 2^largestBits,
0232  *                    where largestBits == huffNode[lastNonNull].nbBits.
0233  * @post              The sum of the ranks of each symbol == 2^largestBits,
0234  *                    where largestBits is the return value <= maxNbBits.
0235  *
0236  * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
0237  * @param lastNonNull The symbol with the lowest count in the Huffman tree.
0238  * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
0239  *                    may not respect. After this function the Huffman tree will
0240  *                    respect maxNbBits.
0241  * @return            The maximum number of bits of the Huffman tree after adjustment,
0242  *                    necessarily no more than maxNbBits.
0243  */
0244 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
0245 {
0246     const U32 largestBits = huffNode[lastNonNull].nbBits;
0247     /* early exit : no elt > maxNbBits, so the tree is already valid. */
0248     if (largestBits <= maxNbBits) return largestBits;
0249 
0250     /* there are several too large elements (at least >= 2) */
0251     {   int totalCost = 0;
0252         const U32 baseCost = 1 << (largestBits - maxNbBits);
0253         int n = (int)lastNonNull;
0254 
0255         /* Adjust any ranks > maxNbBits to maxNbBits.
0256          * Compute totalCost, which is how far the sum of the ranks is
0257          * we are over 2^largestBits after adjust the offending ranks.
0258          */
0259         while (huffNode[n].nbBits > maxNbBits) {
0260             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
0261             huffNode[n].nbBits = (BYTE)maxNbBits;
0262             n--;
0263         }
0264         /* n stops at huffNode[n].nbBits <= maxNbBits */
0265         assert(huffNode[n].nbBits <= maxNbBits);
0266         /* n end at index of smallest symbol using < maxNbBits */
0267         while (huffNode[n].nbBits == maxNbBits) --n;
0268 
0269         /* renorm totalCost from 2^largestBits to 2^maxNbBits
0270          * note : totalCost is necessarily a multiple of baseCost */
0271         assert((totalCost & (baseCost - 1)) == 0);
0272         totalCost >>= (largestBits - maxNbBits);
0273         assert(totalCost > 0);
0274 
0275         /* repay normalized cost */
0276         {   U32 const noSymbol = 0xF0F0F0F0;
0277             U32 rankLast[HUF_TABLELOG_MAX+2];
0278 
0279             /* Get pos of last (smallest = lowest cum. count) symbol per rank */
0280             ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
0281             {   U32 currentNbBits = maxNbBits;
0282                 int pos;
0283                 for (pos=n ; pos >= 0; pos--) {
0284                     if (huffNode[pos].nbBits >= currentNbBits) continue;
0285                     currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
0286                     rankLast[maxNbBits-currentNbBits] = (U32)pos;
0287             }   }
0288 
0289             while (totalCost > 0) {
0290                 /* Try to reduce the next power of 2 above totalCost because we
0291                  * gain back half the rank.
0292                  */
0293                 U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
0294                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
0295                     U32 const highPos = rankLast[nBitsToDecrease];
0296                     U32 const lowPos = rankLast[nBitsToDecrease-1];
0297                     if (highPos == noSymbol) continue;
0298                     /* Decrease highPos if no symbols of lowPos or if it is
0299                      * not cheaper to remove 2 lowPos than highPos.
0300                      */
0301                     if (lowPos == noSymbol) break;
0302                     {   U32 const highTotal = huffNode[highPos].count;
0303                         U32 const lowTotal = 2 * huffNode[lowPos].count;
0304                         if (highTotal <= lowTotal) break;
0305                 }   }
0306                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
0307                 assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
0308                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
0309                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
0310                     nBitsToDecrease++;
0311                 assert(rankLast[nBitsToDecrease] != noSymbol);
0312                 /* Increase the number of bits to gain back half the rank cost. */
0313                 totalCost -= 1 << (nBitsToDecrease-1);
0314                 huffNode[rankLast[nBitsToDecrease]].nbBits++;
0315 
0316                 /* Fix up the new rank.
0317                  * If the new rank was empty, this symbol is now its smallest.
0318                  * Otherwise, this symbol will be the largest in the new rank so no adjustment.
0319                  */
0320                 if (rankLast[nBitsToDecrease-1] == noSymbol)
0321                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
0322                 /* Fix up the old rank.
0323                  * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
0324                  * it must be the only symbol in its rank, so the old rank now has no symbols.
0325                  * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
0326                  * the smallest node in the rank. If the previous position belongs to a different rank,
0327                  * then the rank is now empty.
0328                  */
0329                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
0330                     rankLast[nBitsToDecrease] = noSymbol;
0331                 else {
0332                     rankLast[nBitsToDecrease]--;
0333                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
0334                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
0335                 }
0336             }   /* while (totalCost > 0) */
0337 
0338             /* If we've removed too much weight, then we have to add it back.
0339              * To avoid overshooting again, we only adjust the smallest rank.
0340              * We take the largest nodes from the lowest rank 0 and move them
0341              * to rank 1. There's guaranteed to be enough rank 0 symbols because
0342              * TODO.
0343              */
0344             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
0345                 /* special case : no rank 1 symbol (using maxNbBits-1);
0346                  * let's create one from largest rank 0 (using maxNbBits).
0347                  */
0348                 if (rankLast[1] == noSymbol) {
0349                     while (huffNode[n].nbBits == maxNbBits) n--;
0350                     huffNode[n+1].nbBits--;
0351                     assert(n >= 0);
0352                     rankLast[1] = (U32)(n+1);
0353                     totalCost++;
0354                     continue;
0355                 }
0356                 huffNode[ rankLast[1] + 1 ].nbBits--;
0357                 rankLast[1]++;
0358                 totalCost ++;
0359             }
0360         }   /* repay normalized cost */
0361     }   /* there are several too large elements (at least >= 2) */
0362 
0363     return maxNbBits;
0364 }
0365 
0366 typedef struct {
0367     U32 base;
0368     U32 curr;
0369 } rankPos;
0370 
0371 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
0372 
0373 #define RANK_POSITION_TABLE_SIZE 32
0374 
0375 typedef struct {
0376   huffNodeTable huffNodeTbl;
0377   rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
0378 } HUF_buildCTable_wksp_tables;
0379 
0380 /*
0381  * HUF_sort():
0382  * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
0383  *
0384  * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
0385  *                            Must have (maxSymbolValue + 1) entries.
0386  * @param[in]  count          Histogram of the symbols.
0387  * @param[in]  maxSymbolValue Maximum symbol value.
0388  * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
0389  */
0390 static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
0391 {
0392     int n;
0393     int const maxSymbolValue1 = (int)maxSymbolValue + 1;
0394 
0395     /* Compute base and set curr to base.
0396      * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
0397      * Then 2^lowerRank <= count[n]+1 <= 2^rank.
0398      * We attribute each symbol to lowerRank's base value, because we want to know where
0399      * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
0400      */
0401     ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
0402     for (n = 0; n < maxSymbolValue1; ++n) {
0403         U32 lowerRank = BIT_highbit32(count[n] + 1);
0404         rankPosition[lowerRank].base++;
0405     }
0406     assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
0407     for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
0408         rankPosition[n-1].base += rankPosition[n].base;
0409         rankPosition[n-1].curr = rankPosition[n-1].base;
0410     }
0411     /* Sort */
0412     for (n = 0; n < maxSymbolValue1; ++n) {
0413         U32 const c = count[n];
0414         U32 const r = BIT_highbit32(c+1) + 1;
0415         U32 pos = rankPosition[r].curr++;
0416         /* Insert into the correct position in the rank.
0417          * We have at most 256 symbols, so this insertion should be fine.
0418          */
0419         while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
0420             huffNode[pos] = huffNode[pos-1];
0421             pos--;
0422         }
0423         huffNode[pos].count = c;
0424         huffNode[pos].byte  = (BYTE)n;
0425     }
0426 }
0427 
0428 
0429 /* HUF_buildCTable_wksp() :
0430  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
0431  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
0432  */
0433 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
0434 
0435 /* HUF_buildTree():
0436  * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
0437  *
0438  * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
0439  * @param maxSymbolValue  The maximum symbol value.
0440  * @return                The smallest node in the Huffman tree (by count).
0441  */
0442 static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
0443 {
0444     nodeElt* const huffNode0 = huffNode - 1;
0445     int nonNullRank;
0446     int lowS, lowN;
0447     int nodeNb = STARTNODE;
0448     int n, nodeRoot;
0449     /* init for parents */
0450     nonNullRank = (int)maxSymbolValue;
0451     while(huffNode[nonNullRank].count == 0) nonNullRank--;
0452     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
0453     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
0454     huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
0455     nodeNb++; lowS-=2;
0456     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
0457     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
0458 
0459     /* create parents */
0460     while (nodeNb <= nodeRoot) {
0461         int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
0462         int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
0463         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
0464         huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
0465         nodeNb++;
0466     }
0467 
0468     /* distribute weights (unlimited tree height) */
0469     huffNode[nodeRoot].nbBits = 0;
0470     for (n=nodeRoot-1; n>=STARTNODE; n--)
0471         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
0472     for (n=0; n<=nonNullRank; n++)
0473         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
0474 
0475     return nonNullRank;
0476 }
0477 
0478 /*
0479  * HUF_buildCTableFromTree():
0480  * Build the CTable given the Huffman tree in huffNode.
0481  *
0482  * @param[out] CTable         The output Huffman CTable.
0483  * @param      huffNode       The Huffman tree.
0484  * @param      nonNullRank    The last and smallest node in the Huffman tree.
0485  * @param      maxSymbolValue The maximum symbol value.
0486  * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
0487  */
0488 static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
0489 {
0490     /* fill result into ctable (val, nbBits) */
0491     int n;
0492     U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
0493     U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
0494     int const alphabetSize = (int)(maxSymbolValue + 1);
0495     for (n=0; n<=nonNullRank; n++)
0496         nbPerRank[huffNode[n].nbBits]++;
0497     /* determine starting value per rank */
0498     {   U16 min = 0;
0499         for (n=(int)maxNbBits; n>0; n--) {
0500             valPerRank[n] = min;      /* get starting value within each rank */
0501             min += nbPerRank[n];
0502             min >>= 1;
0503     }   }
0504     for (n=0; n<alphabetSize; n++)
0505         CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
0506     for (n=0; n<alphabetSize; n++)
0507         CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
0508 }
0509 
0510 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
0511 {
0512     HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
0513     nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
0514     nodeElt* const huffNode = huffNode0+1;
0515     int nonNullRank;
0516 
0517     /* safety checks */
0518     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
0519     if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
0520       return ERROR(workSpace_tooSmall);
0521     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
0522     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
0523       return ERROR(maxSymbolValue_tooLarge);
0524     ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
0525 
0526     /* sort, decreasing order */
0527     HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
0528 
0529     /* build tree */
0530     nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
0531 
0532     /* enforce maxTableLog */
0533     maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
0534     if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
0535 
0536     HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
0537 
0538     return maxNbBits;
0539 }
0540 
0541 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
0542 {
0543     size_t nbBits = 0;
0544     int s;
0545     for (s = 0; s <= (int)maxSymbolValue; ++s) {
0546         nbBits += CTable[s].nbBits * count[s];
0547     }
0548     return nbBits >> 3;
0549 }
0550 
0551 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
0552   int bad = 0;
0553   int s;
0554   for (s = 0; s <= (int)maxSymbolValue; ++s) {
0555     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
0556   }
0557   return !bad;
0558 }
0559 
0560 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
0561 
0562 FORCE_INLINE_TEMPLATE void
0563 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
0564 {
0565     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
0566 }
0567 
0568 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
0569 
0570 #define HUF_FLUSHBITS_1(stream) \
0571     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
0572 
0573 #define HUF_FLUSHBITS_2(stream) \
0574     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
0575 
0576 FORCE_INLINE_TEMPLATE size_t
0577 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
0578                                    const void* src, size_t srcSize,
0579                                    const HUF_CElt* CTable)
0580 {
0581     const BYTE* ip = (const BYTE*) src;
0582     BYTE* const ostart = (BYTE*)dst;
0583     BYTE* const oend = ostart + dstSize;
0584     BYTE* op = ostart;
0585     size_t n;
0586     BIT_CStream_t bitC;
0587 
0588     /* init */
0589     if (dstSize < 8) return 0;   /* not enough space to compress */
0590     { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
0591       if (HUF_isError(initErr)) return 0; }
0592 
0593     n = srcSize & ~3;  /* join to mod 4 */
0594     switch (srcSize & 3)
0595     {
0596         case 3:
0597             HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
0598             HUF_FLUSHBITS_2(&bitC);
0599             ZSTD_FALLTHROUGH;
0600         case 2:
0601             HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
0602             HUF_FLUSHBITS_1(&bitC);
0603             ZSTD_FALLTHROUGH;
0604         case 1:
0605             HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
0606             HUF_FLUSHBITS(&bitC);
0607             ZSTD_FALLTHROUGH;
0608         case 0: ZSTD_FALLTHROUGH;
0609         default: break;
0610     }
0611 
0612     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
0613         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
0614         HUF_FLUSHBITS_1(&bitC);
0615         HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
0616         HUF_FLUSHBITS_2(&bitC);
0617         HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
0618         HUF_FLUSHBITS_1(&bitC);
0619         HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
0620         HUF_FLUSHBITS(&bitC);
0621     }
0622 
0623     return BIT_closeCStream(&bitC);
0624 }
0625 
0626 #if DYNAMIC_BMI2
0627 
0628 static TARGET_ATTRIBUTE("bmi2") size_t
0629 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
0630                                    const void* src, size_t srcSize,
0631                                    const HUF_CElt* CTable)
0632 {
0633     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
0634 }
0635 
0636 static size_t
0637 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
0638                                       const void* src, size_t srcSize,
0639                                       const HUF_CElt* CTable)
0640 {
0641     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
0642 }
0643 
0644 static size_t
0645 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
0646                               const void* src, size_t srcSize,
0647                               const HUF_CElt* CTable, const int bmi2)
0648 {
0649     if (bmi2) {
0650         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
0651     }
0652     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
0653 }
0654 
0655 #else
0656 
0657 static size_t
0658 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
0659                               const void* src, size_t srcSize,
0660                               const HUF_CElt* CTable, const int bmi2)
0661 {
0662     (void)bmi2;
0663     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
0664 }
0665 
0666 #endif
0667 
0668 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
0669 {
0670     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
0671 }
0672 
0673 
0674 static size_t
0675 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
0676                               const void* src, size_t srcSize,
0677                               const HUF_CElt* CTable, int bmi2)
0678 {
0679     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
0680     const BYTE* ip = (const BYTE*) src;
0681     const BYTE* const iend = ip + srcSize;
0682     BYTE* const ostart = (BYTE*) dst;
0683     BYTE* const oend = ostart + dstSize;
0684     BYTE* op = ostart;
0685 
0686     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
0687     if (srcSize < 12) return 0;   /* no saving possible : too small input */
0688     op += 6;   /* jumpTable */
0689 
0690     assert(op <= oend);
0691     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
0692         if (cSize==0) return 0;
0693         assert(cSize <= 65535);
0694         MEM_writeLE16(ostart, (U16)cSize);
0695         op += cSize;
0696     }
0697 
0698     ip += segmentSize;
0699     assert(op <= oend);
0700     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
0701         if (cSize==0) return 0;
0702         assert(cSize <= 65535);
0703         MEM_writeLE16(ostart+2, (U16)cSize);
0704         op += cSize;
0705     }
0706 
0707     ip += segmentSize;
0708     assert(op <= oend);
0709     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
0710         if (cSize==0) return 0;
0711         assert(cSize <= 65535);
0712         MEM_writeLE16(ostart+4, (U16)cSize);
0713         op += cSize;
0714     }
0715 
0716     ip += segmentSize;
0717     assert(op <= oend);
0718     assert(ip <= iend);
0719     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
0720         if (cSize==0) return 0;
0721         op += cSize;
0722     }
0723 
0724     return (size_t)(op-ostart);
0725 }
0726 
0727 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
0728 {
0729     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
0730 }
0731 
0732 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
0733 
0734 static size_t HUF_compressCTable_internal(
0735                 BYTE* const ostart, BYTE* op, BYTE* const oend,
0736                 const void* src, size_t srcSize,
0737                 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
0738 {
0739     size_t const cSize = (nbStreams==HUF_singleStream) ?
0740                          HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
0741                          HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
0742     if (HUF_isError(cSize)) { return cSize; }
0743     if (cSize==0) { return 0; }   /* uncompressible */
0744     op += cSize;
0745     /* check compressibility */
0746     assert(op >= ostart);
0747     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
0748     return (size_t)(op-ostart);
0749 }
0750 
0751 typedef struct {
0752     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
0753     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
0754     union {
0755         HUF_buildCTable_wksp_tables buildCTable_wksp;
0756         HUF_WriteCTableWksp writeCTable_wksp;
0757     } wksps;
0758 } HUF_compress_tables_t;
0759 
0760 /* HUF_compress_internal() :
0761  * `workSpace_align4` must be aligned on 4-bytes boundaries,
0762  * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
0763 static size_t
0764 HUF_compress_internal (void* dst, size_t dstSize,
0765                  const void* src, size_t srcSize,
0766                        unsigned maxSymbolValue, unsigned huffLog,
0767                        HUF_nbStreams_e nbStreams,
0768                        void* workSpace_align4, size_t wkspSize,
0769                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
0770                  const int bmi2)
0771 {
0772     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
0773     BYTE* const ostart = (BYTE*)dst;
0774     BYTE* const oend = ostart + dstSize;
0775     BYTE* op = ostart;
0776 
0777     HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
0778     assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
0779 
0780     /* checks & inits */
0781     if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
0782     if (!srcSize) return 0;  /* Uncompressed */
0783     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
0784     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
0785     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
0786     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
0787     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
0788     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
0789 
0790     /* Heuristic : If old table is valid, use it for small inputs */
0791     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
0792         return HUF_compressCTable_internal(ostart, op, oend,
0793                                            src, srcSize,
0794                                            nbStreams, oldHufTable, bmi2);
0795     }
0796 
0797     /* Scan input and build symbol stats */
0798     {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
0799         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
0800         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
0801     }
0802 
0803     /* Check validity of previous table */
0804     if ( repeat
0805       && *repeat == HUF_repeat_check
0806       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
0807         *repeat = HUF_repeat_none;
0808     }
0809     /* Heuristic : use existing table for small inputs */
0810     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
0811         return HUF_compressCTable_internal(ostart, op, oend,
0812                                            src, srcSize,
0813                                            nbStreams, oldHufTable, bmi2);
0814     }
0815 
0816     /* Build Huffman Tree */
0817     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
0818     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
0819                                             maxSymbolValue, huffLog,
0820                                             &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
0821         CHECK_F(maxBits);
0822         huffLog = (U32)maxBits;
0823         /* Zero unused symbols in CTable, so we can check it for validity */
0824         ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
0825                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
0826     }
0827 
0828     /* Write table description header */
0829     {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
0830                                               &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
0831         /* Check if using previous huffman table is beneficial */
0832         if (repeat && *repeat != HUF_repeat_none) {
0833             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
0834             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
0835             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
0836                 return HUF_compressCTable_internal(ostart, op, oend,
0837                                                    src, srcSize,
0838                                                    nbStreams, oldHufTable, bmi2);
0839         }   }
0840 
0841         /* Use the new huffman table */
0842         if (hSize + 12ul >= srcSize) { return 0; }
0843         op += hSize;
0844         if (repeat) { *repeat = HUF_repeat_none; }
0845         if (oldHufTable)
0846             ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
0847     }
0848     return HUF_compressCTable_internal(ostart, op, oend,
0849                                        src, srcSize,
0850                                        nbStreams, table->CTable, bmi2);
0851 }
0852 
0853 
0854 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
0855                       const void* src, size_t srcSize,
0856                       unsigned maxSymbolValue, unsigned huffLog,
0857                       void* workSpace, size_t wkspSize)
0858 {
0859     return HUF_compress_internal(dst, dstSize, src, srcSize,
0860                                  maxSymbolValue, huffLog, HUF_singleStream,
0861                                  workSpace, wkspSize,
0862                                  NULL, NULL, 0, 0 /*bmi2*/);
0863 }
0864 
0865 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
0866                       const void* src, size_t srcSize,
0867                       unsigned maxSymbolValue, unsigned huffLog,
0868                       void* workSpace, size_t wkspSize,
0869                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
0870 {
0871     return HUF_compress_internal(dst, dstSize, src, srcSize,
0872                                  maxSymbolValue, huffLog, HUF_singleStream,
0873                                  workSpace, wkspSize, hufTable,
0874                                  repeat, preferRepeat, bmi2);
0875 }
0876 
0877 /* HUF_compress4X_repeat():
0878  * compress input using 4 streams.
0879  * provide workspace to generate compression tables */
0880 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
0881                       const void* src, size_t srcSize,
0882                       unsigned maxSymbolValue, unsigned huffLog,
0883                       void* workSpace, size_t wkspSize)
0884 {
0885     return HUF_compress_internal(dst, dstSize, src, srcSize,
0886                                  maxSymbolValue, huffLog, HUF_fourStreams,
0887                                  workSpace, wkspSize,
0888                                  NULL, NULL, 0, 0 /*bmi2*/);
0889 }
0890 
0891 /* HUF_compress4X_repeat():
0892  * compress input using 4 streams.
0893  * re-use an existing huffman compression table */
0894 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
0895                       const void* src, size_t srcSize,
0896                       unsigned maxSymbolValue, unsigned huffLog,
0897                       void* workSpace, size_t wkspSize,
0898                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
0899 {
0900     return HUF_compress_internal(dst, dstSize, src, srcSize,
0901                                  maxSymbolValue, huffLog, HUF_fourStreams,
0902                                  workSpace, wkspSize,
0903                                  hufTable, repeat, preferRepeat, bmi2);
0904 }
0905