0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include "zstd_compress_sequences.h"
0015
0016
0017
0018
0019
0020
0021 static unsigned const kInverseProbabilityLog256[256] = {
0022 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
0023 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
0024 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
0025 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
0026 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
0027 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
0028 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
0029 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
0030 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
0031 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
0032 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
0033 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
0034 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
0035 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
0036 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
0037 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
0038 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
0039 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
0040 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
0041 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
0042 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
0043 5, 4, 2, 1,
0044 };
0045
0046 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
0047 void const* ptr = ctable;
0048 U16 const* u16ptr = (U16 const*)ptr;
0049 U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
0050 return maxSymbolValue;
0051 }
0052
0053
0054
0055
0056
0057 static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
0058 {
0059
0060
0061
0062
0063 return nbSeq >= 2048;
0064 }
0065
0066
0067
0068
0069
0070 static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
0071 size_t const nbSeq, unsigned const FSELog)
0072 {
0073 BYTE wksp[FSE_NCOUNTBOUND];
0074 S16 norm[MaxSeq + 1];
0075 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
0076 FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
0077 return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
0078 }
0079
0080
0081
0082
0083
0084 static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
0085 {
0086 unsigned cost = 0;
0087 unsigned s;
0088 for (s = 0; s <= max; ++s) {
0089 unsigned norm = (unsigned)((256 * count[s]) / total);
0090 if (count[s] != 0 && norm == 0)
0091 norm = 1;
0092 assert(count[s] < total);
0093 cost += count[s] * kInverseProbabilityLog256[norm];
0094 }
0095 return cost >> 8;
0096 }
0097
0098
0099
0100
0101
0102 size_t ZSTD_fseBitCost(
0103 FSE_CTable const* ctable,
0104 unsigned const* count,
0105 unsigned const max)
0106 {
0107 unsigned const kAccuracyLog = 8;
0108 size_t cost = 0;
0109 unsigned s;
0110 FSE_CState_t cstate;
0111 FSE_initCState(&cstate, ctable);
0112 if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
0113 DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
0114 ZSTD_getFSEMaxSymbolValue(ctable), max);
0115 return ERROR(GENERIC);
0116 }
0117 for (s = 0; s <= max; ++s) {
0118 unsigned const tableLog = cstate.stateLog;
0119 unsigned const badCost = (tableLog + 1) << kAccuracyLog;
0120 unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
0121 if (count[s] == 0)
0122 continue;
0123 if (bitCost >= badCost) {
0124 DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
0125 return ERROR(GENERIC);
0126 }
0127 cost += (size_t)count[s] * bitCost;
0128 }
0129 return cost >> kAccuracyLog;
0130 }
0131
0132
0133
0134
0135
0136
0137 size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
0138 unsigned const* count, unsigned const max)
0139 {
0140 unsigned const shift = 8 - accuracyLog;
0141 size_t cost = 0;
0142 unsigned s;
0143 assert(accuracyLog <= 8);
0144 for (s = 0; s <= max; ++s) {
0145 unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
0146 unsigned const norm256 = normAcc << shift;
0147 assert(norm256 > 0);
0148 assert(norm256 < 256);
0149 cost += count[s] * kInverseProbabilityLog256[norm256];
0150 }
0151 return cost >> 8;
0152 }
0153
0154 symbolEncodingType_e
0155 ZSTD_selectEncodingType(
0156 FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
0157 size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
0158 FSE_CTable const* prevCTable,
0159 short const* defaultNorm, U32 defaultNormLog,
0160 ZSTD_defaultPolicy_e const isDefaultAllowed,
0161 ZSTD_strategy const strategy)
0162 {
0163 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
0164 if (mostFrequent == nbSeq) {
0165 *repeatMode = FSE_repeat_none;
0166 if (isDefaultAllowed && nbSeq <= 2) {
0167
0168
0169
0170
0171 DEBUGLOG(5, "Selected set_basic");
0172 return set_basic;
0173 }
0174 DEBUGLOG(5, "Selected set_rle");
0175 return set_rle;
0176 }
0177 if (strategy < ZSTD_lazy) {
0178 if (isDefaultAllowed) {
0179 size_t const staticFse_nbSeq_max = 1000;
0180 size_t const mult = 10 - strategy;
0181 size_t const baseLog = 3;
0182 size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;
0183 assert(defaultNormLog >= 5 && defaultNormLog <= 6);
0184 assert(mult <= 9 && mult >= 7);
0185 if ( (*repeatMode == FSE_repeat_valid)
0186 && (nbSeq < staticFse_nbSeq_max) ) {
0187 DEBUGLOG(5, "Selected set_repeat");
0188 return set_repeat;
0189 }
0190 if ( (nbSeq < dynamicFse_nbSeq_min)
0191 || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
0192 DEBUGLOG(5, "Selected set_basic");
0193
0194
0195
0196
0197
0198
0199 *repeatMode = FSE_repeat_none;
0200 return set_basic;
0201 }
0202 }
0203 } else {
0204 size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
0205 size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
0206 size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
0207 size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
0208
0209 if (isDefaultAllowed) {
0210 assert(!ZSTD_isError(basicCost));
0211 assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
0212 }
0213 assert(!ZSTD_isError(NCountCost));
0214 assert(compressedCost < ERROR(maxCode));
0215 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
0216 (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
0217 if (basicCost <= repeatCost && basicCost <= compressedCost) {
0218 DEBUGLOG(5, "Selected set_basic");
0219 assert(isDefaultAllowed);
0220 *repeatMode = FSE_repeat_none;
0221 return set_basic;
0222 }
0223 if (repeatCost <= compressedCost) {
0224 DEBUGLOG(5, "Selected set_repeat");
0225 assert(!ZSTD_isError(repeatCost));
0226 return set_repeat;
0227 }
0228 assert(compressedCost < basicCost && compressedCost < repeatCost);
0229 }
0230 DEBUGLOG(5, "Selected set_compressed");
0231 *repeatMode = FSE_repeat_check;
0232 return set_compressed;
0233 }
0234
0235 typedef struct {
0236 S16 norm[MaxSeq + 1];
0237 U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
0238 } ZSTD_BuildCTableWksp;
0239
0240 size_t
0241 ZSTD_buildCTable(void* dst, size_t dstCapacity,
0242 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
0243 unsigned* count, U32 max,
0244 const BYTE* codeTable, size_t nbSeq,
0245 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
0246 const FSE_CTable* prevCTable, size_t prevCTableSize,
0247 void* entropyWorkspace, size_t entropyWorkspaceSize)
0248 {
0249 BYTE* op = (BYTE*)dst;
0250 const BYTE* const oend = op + dstCapacity;
0251 DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
0252
0253 switch (type) {
0254 case set_rle:
0255 FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
0256 RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
0257 *op = codeTable[0];
0258 return 1;
0259 case set_repeat:
0260 ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
0261 return 0;
0262 case set_basic:
0263 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");
0264 return 0;
0265 case set_compressed: {
0266 ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
0267 size_t nbSeq_1 = nbSeq;
0268 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
0269 if (count[codeTable[nbSeq-1]] > 1) {
0270 count[codeTable[nbSeq-1]]--;
0271 nbSeq_1--;
0272 }
0273 assert(nbSeq_1 > 1);
0274 assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
0275 (void)entropyWorkspaceSize;
0276 FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
0277 { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);
0278 FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
0279 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
0280 return NCountSize;
0281 }
0282 }
0283 default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
0284 }
0285 }
0286
0287 FORCE_INLINE_TEMPLATE size_t
0288 ZSTD_encodeSequences_body(
0289 void* dst, size_t dstCapacity,
0290 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0291 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0292 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0293 seqDef const* sequences, size_t nbSeq, int longOffsets)
0294 {
0295 BIT_CStream_t blockStream;
0296 FSE_CState_t stateMatchLength;
0297 FSE_CState_t stateOffsetBits;
0298 FSE_CState_t stateLitLength;
0299
0300 RETURN_ERROR_IF(
0301 ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
0302 dstSize_tooSmall, "not enough space remaining");
0303 DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
0304 (int)(blockStream.endPtr - blockStream.startPtr),
0305 (unsigned)dstCapacity);
0306
0307
0308 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
0309 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
0310 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
0311 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
0312 if (MEM_32bits()) BIT_flushBits(&blockStream);
0313 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
0314 if (MEM_32bits()) BIT_flushBits(&blockStream);
0315 if (longOffsets) {
0316 U32 const ofBits = ofCodeTable[nbSeq-1];
0317 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
0318 if (extraBits) {
0319 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
0320 BIT_flushBits(&blockStream);
0321 }
0322 BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
0323 ofBits - extraBits);
0324 } else {
0325 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
0326 }
0327 BIT_flushBits(&blockStream);
0328
0329 { size_t n;
0330 for (n=nbSeq-2 ; n<nbSeq ; n--) {
0331 BYTE const llCode = llCodeTable[n];
0332 BYTE const ofCode = ofCodeTable[n];
0333 BYTE const mlCode = mlCodeTable[n];
0334 U32 const llBits = LL_bits[llCode];
0335 U32 const ofBits = ofCode;
0336 U32 const mlBits = ML_bits[mlCode];
0337 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
0338 (unsigned)sequences[n].litLength,
0339 (unsigned)sequences[n].matchLength + MINMATCH,
0340 (unsigned)sequences[n].offset);
0341
0342
0343 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);
0344 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);
0345 if (MEM_32bits()) BIT_flushBits(&blockStream);
0346 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);
0347 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
0348 BIT_flushBits(&blockStream);
0349 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
0350 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
0351 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
0352 if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
0353 if (longOffsets) {
0354 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
0355 if (extraBits) {
0356 BIT_addBits(&blockStream, sequences[n].offset, extraBits);
0357 BIT_flushBits(&blockStream);
0358 }
0359 BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
0360 ofBits - extraBits);
0361 } else {
0362 BIT_addBits(&blockStream, sequences[n].offset, ofBits);
0363 }
0364 BIT_flushBits(&blockStream);
0365 DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
0366 } }
0367
0368 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
0369 FSE_flushCState(&blockStream, &stateMatchLength);
0370 DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
0371 FSE_flushCState(&blockStream, &stateOffsetBits);
0372 DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
0373 FSE_flushCState(&blockStream, &stateLitLength);
0374
0375 { size_t const streamSize = BIT_closeCStream(&blockStream);
0376 RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
0377 return streamSize;
0378 }
0379 }
0380
0381 static size_t
0382 ZSTD_encodeSequences_default(
0383 void* dst, size_t dstCapacity,
0384 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0385 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0386 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0387 seqDef const* sequences, size_t nbSeq, int longOffsets)
0388 {
0389 return ZSTD_encodeSequences_body(dst, dstCapacity,
0390 CTable_MatchLength, mlCodeTable,
0391 CTable_OffsetBits, ofCodeTable,
0392 CTable_LitLength, llCodeTable,
0393 sequences, nbSeq, longOffsets);
0394 }
0395
0396
0397 #if DYNAMIC_BMI2
0398
0399 static TARGET_ATTRIBUTE("bmi2") size_t
0400 ZSTD_encodeSequences_bmi2(
0401 void* dst, size_t dstCapacity,
0402 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0403 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0404 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0405 seqDef const* sequences, size_t nbSeq, int longOffsets)
0406 {
0407 return ZSTD_encodeSequences_body(dst, dstCapacity,
0408 CTable_MatchLength, mlCodeTable,
0409 CTable_OffsetBits, ofCodeTable,
0410 CTable_LitLength, llCodeTable,
0411 sequences, nbSeq, longOffsets);
0412 }
0413
0414 #endif
0415
0416 size_t ZSTD_encodeSequences(
0417 void* dst, size_t dstCapacity,
0418 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
0419 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
0420 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
0421 seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
0422 {
0423 DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
0424 #if DYNAMIC_BMI2
0425 if (bmi2) {
0426 return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
0427 CTable_MatchLength, mlCodeTable,
0428 CTable_OffsetBits, ofCodeTable,
0429 CTable_LitLength, llCodeTable,
0430 sequences, nbSeq, longOffsets);
0431 }
0432 #endif
0433 (void)bmi2;
0434 return ZSTD_encodeSequences_default(dst, dstCapacity,
0435 CTable_MatchLength, mlCodeTable,
0436 CTable_OffsetBits, ofCodeTable,
0437 CTable_LitLength, llCodeTable,
0438 sequences, nbSeq, longOffsets);
0439 }