0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include "zstd_compress_internal.h"
0012 #include "zstd_lazy.h"
0013
0014
0015
0016
0017
0018
0019 static void
0020 ZSTD_updateDUBT(ZSTD_matchState_t* ms,
0021 const BYTE* ip, const BYTE* iend,
0022 U32 mls)
0023 {
0024 const ZSTD_compressionParameters* const cParams = &ms->cParams;
0025 U32* const hashTable = ms->hashTable;
0026 U32 const hashLog = cParams->hashLog;
0027
0028 U32* const bt = ms->chainTable;
0029 U32 const btLog = cParams->chainLog - 1;
0030 U32 const btMask = (1 << btLog) - 1;
0031
0032 const BYTE* const base = ms->window.base;
0033 U32 const target = (U32)(ip - base);
0034 U32 idx = ms->nextToUpdate;
0035
0036 if (idx != target)
0037 DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
0038 idx, target, ms->window.dictLimit);
0039 assert(ip + 8 <= iend);
0040 (void)iend;
0041
0042 assert(idx >= ms->window.dictLimit);
0043 for ( ; idx < target ; idx++) {
0044 size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls);
0045 U32 const matchIndex = hashTable[h];
0046
0047 U32* const nextCandidatePtr = bt + 2*(idx&btMask);
0048 U32* const sortMarkPtr = nextCandidatePtr + 1;
0049
0050 DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
0051 hashTable[h] = idx;
0052 *nextCandidatePtr = matchIndex;
0053 *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
0054 }
0055 ms->nextToUpdate = target;
0056 }
0057
0058
0059
0060
0061
0062
0063 static void
0064 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
0065 U32 curr, const BYTE* inputEnd,
0066 U32 nbCompares, U32 btLow,
0067 const ZSTD_dictMode_e dictMode)
0068 {
0069 const ZSTD_compressionParameters* const cParams = &ms->cParams;
0070 U32* const bt = ms->chainTable;
0071 U32 const btLog = cParams->chainLog - 1;
0072 U32 const btMask = (1 << btLog) - 1;
0073 size_t commonLengthSmaller=0, commonLengthLarger=0;
0074 const BYTE* const base = ms->window.base;
0075 const BYTE* const dictBase = ms->window.dictBase;
0076 const U32 dictLimit = ms->window.dictLimit;
0077 const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
0078 const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
0079 const BYTE* const dictEnd = dictBase + dictLimit;
0080 const BYTE* const prefixStart = base + dictLimit;
0081 const BYTE* match;
0082 U32* smallerPtr = bt + 2*(curr&btMask);
0083 U32* largerPtr = smallerPtr + 1;
0084 U32 matchIndex = *smallerPtr;
0085 U32 dummy32;
0086 U32 const windowValid = ms->window.lowLimit;
0087 U32 const maxDistance = 1U << cParams->windowLog;
0088 U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
0089
0090
0091 DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
0092 curr, dictLimit, windowLow);
0093 assert(curr >= btLow);
0094 assert(ip < iend);
0095
0096 for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
0097 U32* const nextPtr = bt + 2*(matchIndex & btMask);
0098 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);
0099 assert(matchIndex < curr);
0100
0101
0102
0103
0104 if ( (dictMode != ZSTD_extDict)
0105 || (matchIndex+matchLength >= dictLimit)
0106 || (curr < dictLimit) ) {
0107 const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
0108 || (matchIndex+matchLength >= dictLimit)) ?
0109 base : dictBase;
0110 assert( (matchIndex+matchLength >= dictLimit)
0111 || (curr < dictLimit) );
0112 match = mBase + matchIndex;
0113 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
0114 } else {
0115 match = dictBase + matchIndex;
0116 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
0117 if (matchIndex+matchLength >= dictLimit)
0118 match = base + matchIndex;
0119 }
0120
0121 DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
0122 curr, matchIndex, (U32)matchLength);
0123
0124 if (ip+matchLength == iend) {
0125 break;
0126 }
0127
0128 if (match[matchLength] < ip[matchLength]) {
0129
0130 *smallerPtr = matchIndex;
0131 commonLengthSmaller = matchLength;
0132 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }
0133 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
0134 matchIndex, btLow, nextPtr[1]);
0135 smallerPtr = nextPtr+1;
0136 matchIndex = nextPtr[1];
0137 } else {
0138
0139 *largerPtr = matchIndex;
0140 commonLengthLarger = matchLength;
0141 if (matchIndex <= btLow) { largerPtr=&dummy32; break; }
0142 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
0143 matchIndex, btLow, nextPtr[0]);
0144 largerPtr = nextPtr;
0145 matchIndex = nextPtr[0];
0146 } }
0147
0148 *smallerPtr = *largerPtr = 0;
0149 }
0150
0151
0152 static size_t
0153 ZSTD_DUBT_findBetterDictMatch (
0154 ZSTD_matchState_t* ms,
0155 const BYTE* const ip, const BYTE* const iend,
0156 size_t* offsetPtr,
0157 size_t bestLength,
0158 U32 nbCompares,
0159 U32 const mls,
0160 const ZSTD_dictMode_e dictMode)
0161 {
0162 const ZSTD_matchState_t * const dms = ms->dictMatchState;
0163 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
0164 const U32 * const dictHashTable = dms->hashTable;
0165 U32 const hashLog = dmsCParams->hashLog;
0166 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
0167 U32 dictMatchIndex = dictHashTable[h];
0168
0169 const BYTE* const base = ms->window.base;
0170 const BYTE* const prefixStart = base + ms->window.dictLimit;
0171 U32 const curr = (U32)(ip-base);
0172 const BYTE* const dictBase = dms->window.base;
0173 const BYTE* const dictEnd = dms->window.nextSrc;
0174 U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
0175 U32 const dictLowLimit = dms->window.lowLimit;
0176 U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
0177
0178 U32* const dictBt = dms->chainTable;
0179 U32 const btLog = dmsCParams->chainLog - 1;
0180 U32 const btMask = (1 << btLog) - 1;
0181 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
0182
0183 size_t commonLengthSmaller=0, commonLengthLarger=0;
0184
0185 (void)dictMode;
0186 assert(dictMode == ZSTD_dictMatchState);
0187
0188 for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
0189 U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
0190 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);
0191 const BYTE* match = dictBase + dictMatchIndex;
0192 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
0193 if (dictMatchIndex+matchLength >= dictHighLimit)
0194 match = base + dictMatchIndex + dictIndexDelta;
0195
0196 if (matchLength > bestLength) {
0197 U32 matchIndex = dictMatchIndex + dictIndexDelta;
0198 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
0199 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
0200 curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
0201 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
0202 }
0203 if (ip+matchLength == iend) {
0204 break;
0205 }
0206 }
0207
0208 if (match[matchLength] < ip[matchLength]) {
0209 if (dictMatchIndex <= btLow) { break; }
0210 commonLengthSmaller = matchLength;
0211 dictMatchIndex = nextPtr[1];
0212 } else {
0213
0214 if (dictMatchIndex <= btLow) { break; }
0215 commonLengthLarger = matchLength;
0216 dictMatchIndex = nextPtr[0];
0217 }
0218 }
0219
0220 if (bestLength >= MINMATCH) {
0221 U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
0222 DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
0223 curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
0224 }
0225 return bestLength;
0226
0227 }
0228
0229
0230 static size_t
0231 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
0232 const BYTE* const ip, const BYTE* const iend,
0233 size_t* offsetPtr,
0234 U32 const mls,
0235 const ZSTD_dictMode_e dictMode)
0236 {
0237 const ZSTD_compressionParameters* const cParams = &ms->cParams;
0238 U32* const hashTable = ms->hashTable;
0239 U32 const hashLog = cParams->hashLog;
0240 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
0241 U32 matchIndex = hashTable[h];
0242
0243 const BYTE* const base = ms->window.base;
0244 U32 const curr = (U32)(ip-base);
0245 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
0246
0247 U32* const bt = ms->chainTable;
0248 U32 const btLog = cParams->chainLog - 1;
0249 U32 const btMask = (1 << btLog) - 1;
0250 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
0251 U32 const unsortLimit = MAX(btLow, windowLow);
0252
0253 U32* nextCandidate = bt + 2*(matchIndex&btMask);
0254 U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
0255 U32 nbCompares = 1U << cParams->searchLog;
0256 U32 nbCandidates = nbCompares;
0257 U32 previousCandidate = 0;
0258
0259 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
0260 assert(ip <= iend-8);
0261 assert(dictMode != ZSTD_dedicatedDictSearch);
0262
0263
0264 while ( (matchIndex > unsortLimit)
0265 && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
0266 && (nbCandidates > 1) ) {
0267 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
0268 matchIndex);
0269 *unsortedMark = previousCandidate;
0270 previousCandidate = matchIndex;
0271 matchIndex = *nextCandidate;
0272 nextCandidate = bt + 2*(matchIndex&btMask);
0273 unsortedMark = bt + 2*(matchIndex&btMask) + 1;
0274 nbCandidates --;
0275 }
0276
0277
0278
0279 if ( (matchIndex > unsortLimit)
0280 && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
0281 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
0282 matchIndex);
0283 *nextCandidate = *unsortedMark = 0;
0284 }
0285
0286
0287 matchIndex = previousCandidate;
0288 while (matchIndex) {
0289 U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
0290 U32 const nextCandidateIdx = *nextCandidateIdxPtr;
0291 ZSTD_insertDUBT1(ms, matchIndex, iend,
0292 nbCandidates, unsortLimit, dictMode);
0293 matchIndex = nextCandidateIdx;
0294 nbCandidates++;
0295 }
0296
0297
0298 { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
0299 const BYTE* const dictBase = ms->window.dictBase;
0300 const U32 dictLimit = ms->window.dictLimit;
0301 const BYTE* const dictEnd = dictBase + dictLimit;
0302 const BYTE* const prefixStart = base + dictLimit;
0303 U32* smallerPtr = bt + 2*(curr&btMask);
0304 U32* largerPtr = bt + 2*(curr&btMask) + 1;
0305 U32 matchEndIdx = curr + 8 + 1;
0306 U32 dummy32;
0307 size_t bestLength = 0;
0308
0309 matchIndex = hashTable[h];
0310 hashTable[h] = curr;
0311
0312 for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
0313 U32* const nextPtr = bt + 2*(matchIndex & btMask);
0314 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);
0315 const BYTE* match;
0316
0317 if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
0318 match = base + matchIndex;
0319 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
0320 } else {
0321 match = dictBase + matchIndex;
0322 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
0323 if (matchIndex+matchLength >= dictLimit)
0324 match = base + matchIndex;
0325 }
0326
0327 if (matchLength > bestLength) {
0328 if (matchLength > matchEndIdx - matchIndex)
0329 matchEndIdx = matchIndex + (U32)matchLength;
0330 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
0331 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
0332 if (ip+matchLength == iend) {
0333 if (dictMode == ZSTD_dictMatchState) {
0334 nbCompares = 0;
0335
0336
0337 }
0338 break;
0339 }
0340 }
0341
0342 if (match[matchLength] < ip[matchLength]) {
0343
0344 *smallerPtr = matchIndex;
0345 commonLengthSmaller = matchLength;
0346 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }
0347 smallerPtr = nextPtr+1;
0348 matchIndex = nextPtr[1];
0349 } else {
0350
0351 *largerPtr = matchIndex;
0352 commonLengthLarger = matchLength;
0353 if (matchIndex <= btLow) { largerPtr=&dummy32; break; }
0354 largerPtr = nextPtr;
0355 matchIndex = nextPtr[0];
0356 } }
0357
0358 *smallerPtr = *largerPtr = 0;
0359
0360 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX));
0361 if (dictMode == ZSTD_dictMatchState && nbCompares) {
0362 bestLength = ZSTD_DUBT_findBetterDictMatch(
0363 ms, ip, iend,
0364 offsetPtr, bestLength, nbCompares,
0365 mls, dictMode);
0366 }
0367
0368 assert(matchEndIdx > curr+8);
0369 ms->nextToUpdate = matchEndIdx - 8;
0370 if (bestLength >= MINMATCH) {
0371 U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
0372 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
0373 curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
0374 }
0375 return bestLength;
0376 }
0377 }
0378
0379
0380
0381 FORCE_INLINE_TEMPLATE size_t
0382 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
0383 const BYTE* const ip, const BYTE* const iLimit,
0384 size_t* offsetPtr,
0385 const U32 mls ,
0386 const ZSTD_dictMode_e dictMode)
0387 {
0388 DEBUGLOG(7, "ZSTD_BtFindBestMatch");
0389 if (ip < ms->window.base + ms->nextToUpdate) return 0;
0390 ZSTD_updateDUBT(ms, ip, iLimit, mls);
0391 return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
0392 }
0393
0394
0395 static size_t
0396 ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
0397 const BYTE* ip, const BYTE* const iLimit,
0398 size_t* offsetPtr)
0399 {
0400 switch(ms->cParams.minMatch)
0401 {
0402 default :
0403 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
0404 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
0405 case 7 :
0406 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
0407 }
0408 }
0409
0410
0411 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
0412 ZSTD_matchState_t* ms,
0413 const BYTE* ip, const BYTE* const iLimit,
0414 size_t* offsetPtr)
0415 {
0416 switch(ms->cParams.minMatch)
0417 {
0418 default :
0419 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
0420 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
0421 case 7 :
0422 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
0423 }
0424 }
0425
0426
0427 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
0428 ZSTD_matchState_t* ms,
0429 const BYTE* ip, const BYTE* const iLimit,
0430 size_t* offsetPtr)
0431 {
0432 switch(ms->cParams.minMatch)
0433 {
0434 default :
0435 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
0436 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
0437 case 7 :
0438 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
0439 }
0440 }
0441
0442
0443
0444
0445
0446
0447 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
0448
0449
0450
0451 FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
0452 ZSTD_matchState_t* ms,
0453 const ZSTD_compressionParameters* const cParams,
0454 const BYTE* ip, U32 const mls)
0455 {
0456 U32* const hashTable = ms->hashTable;
0457 const U32 hashLog = cParams->hashLog;
0458 U32* const chainTable = ms->chainTable;
0459 const U32 chainMask = (1 << cParams->chainLog) - 1;
0460 const BYTE* const base = ms->window.base;
0461 const U32 target = (U32)(ip - base);
0462 U32 idx = ms->nextToUpdate;
0463
0464 while(idx < target) {
0465 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
0466 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
0467 hashTable[h] = idx;
0468 idx++;
0469 }
0470
0471 ms->nextToUpdate = target;
0472 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
0473 }
0474
0475 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
0476 const ZSTD_compressionParameters* const cParams = &ms->cParams;
0477 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
0478 }
0479
0480 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
0481 {
0482 const BYTE* const base = ms->window.base;
0483 U32 const target = (U32)(ip - base);
0484 U32* const hashTable = ms->hashTable;
0485 U32* const chainTable = ms->chainTable;
0486 U32 const chainSize = 1 << ms->cParams.chainLog;
0487 U32 idx = ms->nextToUpdate;
0488 U32 const minChain = chainSize < target ? target - chainSize : idx;
0489 U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
0490 U32 const cacheSize = bucketSize - 1;
0491 U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
0492 U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
0493
0494
0495
0496
0497
0498
0499 U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
0500 U32* const tmpHashTable = hashTable;
0501 U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
0502 U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
0503 U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
0504
0505 U32 hashIdx;
0506
0507 assert(ms->cParams.chainLog <= 24);
0508 assert(ms->cParams.hashLog >= ms->cParams.chainLog);
0509 assert(idx != 0);
0510 assert(tmpMinChain <= minChain);
0511
0512
0513 for ( ; idx < target; idx++) {
0514 U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
0515 if (idx >= tmpMinChain) {
0516 tmpChainTable[idx - tmpMinChain] = hashTable[h];
0517 }
0518 tmpHashTable[h] = idx;
0519 }
0520
0521
0522 {
0523 U32 chainPos = 0;
0524 for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
0525 U32 count;
0526 U32 countBeyondMinChain = 0;
0527 U32 i = tmpHashTable[hashIdx];
0528 for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
0529
0530
0531 if (i < minChain) {
0532 countBeyondMinChain++;
0533 }
0534 i = tmpChainTable[i - tmpMinChain];
0535 }
0536 if (count == cacheSize) {
0537 for (count = 0; count < chainLimit;) {
0538 if (i < minChain) {
0539 if (!i || countBeyondMinChain++ > cacheSize) {
0540
0541
0542
0543
0544
0545
0546
0547
0548 break;
0549 }
0550 }
0551 chainTable[chainPos++] = i;
0552 count++;
0553 if (i < tmpMinChain) {
0554 break;
0555 }
0556 i = tmpChainTable[i - tmpMinChain];
0557 }
0558 } else {
0559 count = 0;
0560 }
0561 if (count) {
0562 tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
0563 } else {
0564 tmpHashTable[hashIdx] = 0;
0565 }
0566 }
0567 assert(chainPos <= chainSize);
0568 }
0569
0570
0571 for (hashIdx = (1 << hashLog); hashIdx; ) {
0572 U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
0573 U32 const chainPackedPointer = tmpHashTable[hashIdx];
0574 U32 i;
0575 for (i = 0; i < cacheSize; i++) {
0576 hashTable[bucketIdx + i] = 0;
0577 }
0578 hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
0579 }
0580
0581
0582 for (idx = ms->nextToUpdate; idx < target; idx++) {
0583 U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
0584 << ZSTD_LAZY_DDSS_BUCKET_LOG;
0585 U32 i;
0586
0587 for (i = cacheSize - 1; i; i--)
0588 hashTable[h + i] = hashTable[h + i - 1];
0589 hashTable[h] = idx;
0590 }
0591
0592 ms->nextToUpdate = target;
0593 }
0594
0595
0596
0597 FORCE_INLINE_TEMPLATE
0598 size_t ZSTD_HcFindBestMatch_generic (
0599 ZSTD_matchState_t* ms,
0600 const BYTE* const ip, const BYTE* const iLimit,
0601 size_t* offsetPtr,
0602 const U32 mls, const ZSTD_dictMode_e dictMode)
0603 {
0604 const ZSTD_compressionParameters* const cParams = &ms->cParams;
0605 U32* const chainTable = ms->chainTable;
0606 const U32 chainSize = (1 << cParams->chainLog);
0607 const U32 chainMask = chainSize-1;
0608 const BYTE* const base = ms->window.base;
0609 const BYTE* const dictBase = ms->window.dictBase;
0610 const U32 dictLimit = ms->window.dictLimit;
0611 const BYTE* const prefixStart = base + dictLimit;
0612 const BYTE* const dictEnd = dictBase + dictLimit;
0613 const U32 curr = (U32)(ip-base);
0614 const U32 maxDistance = 1U << cParams->windowLog;
0615 const U32 lowestValid = ms->window.lowLimit;
0616 const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
0617 const U32 isDictionary = (ms->loadedDictEnd != 0);
0618 const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
0619 const U32 minChain = curr > chainSize ? curr - chainSize : 0;
0620 U32 nbAttempts = 1U << cParams->searchLog;
0621 size_t ml=4-1;
0622
0623 const ZSTD_matchState_t* const dms = ms->dictMatchState;
0624 const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
0625 ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
0626 const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
0627 ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
0628
0629 U32 matchIndex;
0630
0631 if (dictMode == ZSTD_dedicatedDictSearch) {
0632 const U32* entry = &dms->hashTable[ddsIdx];
0633 PREFETCH_L1(entry);
0634 }
0635
0636
0637 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
0638
0639 for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
0640 size_t currentMl=0;
0641 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
0642 const BYTE* const match = base + matchIndex;
0643 assert(matchIndex >= dictLimit);
0644 if (match[ml] == ip[ml])
0645 currentMl = ZSTD_count(ip, match, iLimit);
0646 } else {
0647 const BYTE* const match = dictBase + matchIndex;
0648 assert(match+4 <= dictEnd);
0649 if (MEM_read32(match) == MEM_read32(ip))
0650 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
0651 }
0652
0653
0654 if (currentMl > ml) {
0655 ml = currentMl;
0656 *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
0657 if (ip+currentMl == iLimit) break;
0658 }
0659
0660 if (matchIndex <= minChain) break;
0661 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
0662 }
0663
0664 assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX));
0665 if (dictMode == ZSTD_dedicatedDictSearch) {
0666 const U32 ddsLowestIndex = dms->window.dictLimit;
0667 const BYTE* const ddsBase = dms->window.base;
0668 const BYTE* const ddsEnd = dms->window.nextSrc;
0669 const U32 ddsSize = (U32)(ddsEnd - ddsBase);
0670 const U32 ddsIndexDelta = dictLimit - ddsSize;
0671 const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
0672 const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
0673 U32 ddsAttempt;
0674
0675 for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
0676 PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
0677 }
0678
0679 {
0680 U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
0681 U32 const chainIndex = chainPackedPointer >> 8;
0682
0683 PREFETCH_L1(&dms->chainTable[chainIndex]);
0684 }
0685
0686 for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
0687 size_t currentMl=0;
0688 const BYTE* match;
0689 matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
0690 match = ddsBase + matchIndex;
0691
0692 if (!matchIndex) {
0693 return ml;
0694 }
0695
0696
0697 (void)ddsLowestIndex;
0698 assert(matchIndex >= ddsLowestIndex);
0699 assert(match+4 <= ddsEnd);
0700 if (MEM_read32(match) == MEM_read32(ip)) {
0701
0702 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
0703 }
0704
0705
0706 if (currentMl > ml) {
0707 ml = currentMl;
0708 *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
0709 if (ip+currentMl == iLimit) {
0710
0711 return ml;
0712 }
0713 }
0714 }
0715
0716 {
0717 U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
0718 U32 chainIndex = chainPackedPointer >> 8;
0719 U32 const chainLength = chainPackedPointer & 0xFF;
0720 U32 const chainAttempts = nbAttempts - ddsAttempt;
0721 U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
0722 U32 chainAttempt;
0723
0724 for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
0725 PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
0726 }
0727
0728 for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
0729 size_t currentMl=0;
0730 const BYTE* match;
0731 matchIndex = dms->chainTable[chainIndex];
0732 match = ddsBase + matchIndex;
0733
0734
0735 assert(matchIndex >= ddsLowestIndex);
0736 assert(match+4 <= ddsEnd);
0737 if (MEM_read32(match) == MEM_read32(ip)) {
0738
0739 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
0740 }
0741
0742
0743 if (currentMl > ml) {
0744 ml = currentMl;
0745 *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
0746 if (ip+currentMl == iLimit) break;
0747 }
0748 }
0749 }
0750 } else if (dictMode == ZSTD_dictMatchState) {
0751 const U32* const dmsChainTable = dms->chainTable;
0752 const U32 dmsChainSize = (1 << dms->cParams.chainLog);
0753 const U32 dmsChainMask = dmsChainSize - 1;
0754 const U32 dmsLowestIndex = dms->window.dictLimit;
0755 const BYTE* const dmsBase = dms->window.base;
0756 const BYTE* const dmsEnd = dms->window.nextSrc;
0757 const U32 dmsSize = (U32)(dmsEnd - dmsBase);
0758 const U32 dmsIndexDelta = dictLimit - dmsSize;
0759 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
0760
0761 matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
0762
0763 for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
0764 size_t currentMl=0;
0765 const BYTE* const match = dmsBase + matchIndex;
0766 assert(match+4 <= dmsEnd);
0767 if (MEM_read32(match) == MEM_read32(ip))
0768 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
0769
0770
0771 if (currentMl > ml) {
0772 ml = currentMl;
0773 *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
0774 if (ip+currentMl == iLimit) break;
0775 }
0776
0777 if (matchIndex <= dmsMinChain) break;
0778
0779 matchIndex = dmsChainTable[matchIndex & dmsChainMask];
0780 }
0781 }
0782
0783 return ml;
0784 }
0785
0786
0787 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
0788 ZSTD_matchState_t* ms,
0789 const BYTE* ip, const BYTE* const iLimit,
0790 size_t* offsetPtr)
0791 {
0792 switch(ms->cParams.minMatch)
0793 {
0794 default :
0795 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
0796 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
0797 case 7 :
0798 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
0799 }
0800 }
0801
0802
0803 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
0804 ZSTD_matchState_t* ms,
0805 const BYTE* ip, const BYTE* const iLimit,
0806 size_t* offsetPtr)
0807 {
0808 switch(ms->cParams.minMatch)
0809 {
0810 default :
0811 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
0812 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
0813 case 7 :
0814 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
0815 }
0816 }
0817
0818
0819 static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
0820 ZSTD_matchState_t* ms,
0821 const BYTE* ip, const BYTE* const iLimit,
0822 size_t* offsetPtr)
0823 {
0824 switch(ms->cParams.minMatch)
0825 {
0826 default :
0827 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
0828 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
0829 case 7 :
0830 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
0831 }
0832 }
0833
0834
0835 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
0836 ZSTD_matchState_t* ms,
0837 const BYTE* ip, const BYTE* const iLimit,
0838 size_t* offsetPtr)
0839 {
0840 switch(ms->cParams.minMatch)
0841 {
0842 default :
0843 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
0844 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
0845 case 7 :
0846 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
0847 }
0848 }
0849
0850
0851
0852
0853
0854 typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
0855
0856 FORCE_INLINE_TEMPLATE size_t
0857 ZSTD_compressBlock_lazy_generic(
0858 ZSTD_matchState_t* ms, seqStore_t* seqStore,
0859 U32 rep[ZSTD_REP_NUM],
0860 const void* src, size_t srcSize,
0861 const searchMethod_e searchMethod, const U32 depth,
0862 ZSTD_dictMode_e const dictMode)
0863 {
0864 const BYTE* const istart = (const BYTE*)src;
0865 const BYTE* ip = istart;
0866 const BYTE* anchor = istart;
0867 const BYTE* const iend = istart + srcSize;
0868 const BYTE* const ilimit = iend - 8;
0869 const BYTE* const base = ms->window.base;
0870 const U32 prefixLowestIndex = ms->window.dictLimit;
0871 const BYTE* const prefixLowest = base + prefixLowestIndex;
0872
0873 typedef size_t (*searchMax_f)(
0874 ZSTD_matchState_t* ms,
0875 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
0876
0877
0878
0879
0880
0881
0882
0883 const searchMax_f searchFuncs[4][2] = {
0884 {
0885 ZSTD_HcFindBestMatch_selectMLS,
0886 ZSTD_BtFindBestMatch_selectMLS
0887 },
0888 {
0889 NULL,
0890 NULL
0891 },
0892 {
0893 ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
0894 ZSTD_BtFindBestMatch_dictMatchState_selectMLS
0895 },
0896 {
0897 ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
0898 NULL
0899 }
0900 };
0901
0902 searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
0903 U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
0904
0905 const int isDMS = dictMode == ZSTD_dictMatchState;
0906 const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
0907 const int isDxS = isDMS || isDDS;
0908 const ZSTD_matchState_t* const dms = ms->dictMatchState;
0909 const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
0910 const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
0911 const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
0912 const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
0913 const U32 dictIndexDelta = isDxS ?
0914 prefixLowestIndex - (U32)(dictEnd - dictBase) :
0915 0;
0916 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
0917
0918 assert(searchMax != NULL);
0919
0920 DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
0921
0922
0923 ip += (dictAndPrefixLength == 0);
0924 if (dictMode == ZSTD_noDict) {
0925 U32 const curr = (U32)(ip - base);
0926 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
0927 U32 const maxRep = curr - windowLow;
0928 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
0929 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
0930 }
0931 if (isDxS) {
0932
0933
0934 assert(offset_1 <= dictAndPrefixLength);
0935 assert(offset_2 <= dictAndPrefixLength);
0936 }
0937
0938
0939 #if defined(__x86_64__)
0940
0941
0942
0943 __asm__(".p2align 5");
0944 #endif
0945 while (ip < ilimit) {
0946 size_t matchLength=0;
0947 size_t offset=0;
0948 const BYTE* start=ip+1;
0949
0950
0951 if (isDxS) {
0952 const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
0953 const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
0954 && repIndex < prefixLowestIndex) ?
0955 dictBase + (repIndex - dictIndexDelta) :
0956 base + repIndex;
0957 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 )
0958 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
0959 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
0960 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
0961 if (depth==0) goto _storeSequence;
0962 }
0963 }
0964 if ( dictMode == ZSTD_noDict
0965 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
0966 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
0967 if (depth==0) goto _storeSequence;
0968 }
0969
0970
0971 { size_t offsetFound = 999999999;
0972 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
0973 if (ml2 > matchLength)
0974 matchLength = ml2, start = ip, offset=offsetFound;
0975 }
0976
0977 if (matchLength < 4) {
0978 ip += ((ip-anchor) >> kSearchStrength) + 1;
0979 continue;
0980 }
0981
0982
0983 if (depth>=1)
0984 while (ip<ilimit) {
0985 ip ++;
0986 if ( (dictMode == ZSTD_noDict)
0987 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
0988 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
0989 int const gain2 = (int)(mlRep * 3);
0990 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
0991 if ((mlRep >= 4) && (gain2 > gain1))
0992 matchLength = mlRep, offset = 0, start = ip;
0993 }
0994 if (isDxS) {
0995 const U32 repIndex = (U32)(ip - base) - offset_1;
0996 const BYTE* repMatch = repIndex < prefixLowestIndex ?
0997 dictBase + (repIndex - dictIndexDelta) :
0998 base + repIndex;
0999 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 )
1000 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1001 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1002 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1003 int const gain2 = (int)(mlRep * 3);
1004 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1005 if ((mlRep >= 4) && (gain2 > gain1))
1006 matchLength = mlRep, offset = 0, start = ip;
1007 }
1008 }
1009 { size_t offset2=999999999;
1010 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1011 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));
1012 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1013 if ((ml2 >= 4) && (gain2 > gain1)) {
1014 matchLength = ml2, offset = offset2, start = ip;
1015 continue;
1016 } }
1017
1018
1019 if ((depth==2) && (ip<ilimit)) {
1020 ip ++;
1021 if ( (dictMode == ZSTD_noDict)
1022 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1023 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
1024 int const gain2 = (int)(mlRep * 4);
1025 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1026 if ((mlRep >= 4) && (gain2 > gain1))
1027 matchLength = mlRep, offset = 0, start = ip;
1028 }
1029 if (isDxS) {
1030 const U32 repIndex = (U32)(ip - base) - offset_1;
1031 const BYTE* repMatch = repIndex < prefixLowestIndex ?
1032 dictBase + (repIndex - dictIndexDelta) :
1033 base + repIndex;
1034 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 )
1035 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1036 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1037 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1038 int const gain2 = (int)(mlRep * 4);
1039 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1040 if ((mlRep >= 4) && (gain2 > gain1))
1041 matchLength = mlRep, offset = 0, start = ip;
1042 }
1043 }
1044 { size_t offset2=999999999;
1045 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1046 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));
1047 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1048 if ((ml2 >= 4) && (gain2 > gain1)) {
1049 matchLength = ml2, offset = offset2, start = ip;
1050 continue;
1051 } } }
1052 break;
1053 }
1054
1055
1056
1057
1058
1059
1060
1061 if (offset) {
1062 if (dictMode == ZSTD_noDict) {
1063 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
1064 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )
1065 { start--; matchLength++; }
1066 }
1067 if (isDxS) {
1068 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1069 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
1070 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
1071 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }
1072 }
1073 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1074 }
1075
1076 _storeSequence:
1077 { size_t const litLength = start - anchor;
1078 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1079 anchor = ip = start + matchLength;
1080 }
1081
1082
1083 if (isDxS) {
1084 while (ip <= ilimit) {
1085 U32 const current2 = (U32)(ip-base);
1086 U32 const repIndex = current2 - offset_2;
1087 const BYTE* repMatch = repIndex < prefixLowestIndex ?
1088 dictBase - dictIndexDelta + repIndex :
1089 base + repIndex;
1090 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 )
1091 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1092 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
1093 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
1094 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;
1095 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1096 ip += matchLength;
1097 anchor = ip;
1098 continue;
1099 }
1100 break;
1101 }
1102 }
1103
1104 if (dictMode == ZSTD_noDict) {
1105 while ( ((ip <= ilimit) & (offset_2>0))
1106 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
1107
1108 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1109 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;
1110 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1111 ip += matchLength;
1112 anchor = ip;
1113 continue;
1114 } } }
1115
1116
1117 rep[0] = offset_1 ? offset_1 : savedOffset;
1118 rep[1] = offset_2 ? offset_2 : savedOffset;
1119
1120
1121 return (size_t)(iend - anchor);
1122 }
1123
1124
1125 size_t ZSTD_compressBlock_btlazy2(
1126 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1127 void const* src, size_t srcSize)
1128 {
1129 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
1130 }
1131
1132 size_t ZSTD_compressBlock_lazy2(
1133 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1134 void const* src, size_t srcSize)
1135 {
1136 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
1137 }
1138
1139 size_t ZSTD_compressBlock_lazy(
1140 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1141 void const* src, size_t srcSize)
1142 {
1143 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
1144 }
1145
1146 size_t ZSTD_compressBlock_greedy(
1147 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1148 void const* src, size_t srcSize)
1149 {
1150 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
1151 }
1152
1153 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
1154 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1155 void const* src, size_t srcSize)
1156 {
1157 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
1158 }
1159
1160 size_t ZSTD_compressBlock_lazy2_dictMatchState(
1161 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1162 void const* src, size_t srcSize)
1163 {
1164 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
1165 }
1166
1167 size_t ZSTD_compressBlock_lazy_dictMatchState(
1168 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1169 void const* src, size_t srcSize)
1170 {
1171 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
1172 }
1173
1174 size_t ZSTD_compressBlock_greedy_dictMatchState(
1175 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1176 void const* src, size_t srcSize)
1177 {
1178 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1179 }
1180
1181
1182 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1183 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1184 void const* src, size_t srcSize)
1185 {
1186 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1187 }
1188
1189 size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1190 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1191 void const* src, size_t srcSize)
1192 {
1193 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1194 }
1195
1196 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1197 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1198 void const* src, size_t srcSize)
1199 {
1200 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
1201 }
1202
1203
1204 FORCE_INLINE_TEMPLATE
1205 size_t ZSTD_compressBlock_lazy_extDict_generic(
1206 ZSTD_matchState_t* ms, seqStore_t* seqStore,
1207 U32 rep[ZSTD_REP_NUM],
1208 const void* src, size_t srcSize,
1209 const searchMethod_e searchMethod, const U32 depth)
1210 {
1211 const BYTE* const istart = (const BYTE*)src;
1212 const BYTE* ip = istart;
1213 const BYTE* anchor = istart;
1214 const BYTE* const iend = istart + srcSize;
1215 const BYTE* const ilimit = iend - 8;
1216 const BYTE* const base = ms->window.base;
1217 const U32 dictLimit = ms->window.dictLimit;
1218 const BYTE* const prefixStart = base + dictLimit;
1219 const BYTE* const dictBase = ms->window.dictBase;
1220 const BYTE* const dictEnd = dictBase + dictLimit;
1221 const BYTE* const dictStart = dictBase + ms->window.lowLimit;
1222 const U32 windowLog = ms->cParams.windowLog;
1223
1224 typedef size_t (*searchMax_f)(
1225 ZSTD_matchState_t* ms,
1226 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
1227 searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
1228
1229 U32 offset_1 = rep[0], offset_2 = rep[1];
1230
1231 DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
1232
1233
1234 ip += (ip == prefixStart);
1235
1236
1237 #if defined(__x86_64__)
1238
1239
1240
1241 __asm__(".p2align 5");
1242 #endif
1243 while (ip < ilimit) {
1244 size_t matchLength=0;
1245 size_t offset=0;
1246 const BYTE* start=ip+1;
1247 U32 curr = (U32)(ip-base);
1248
1249
1250 { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1251 const U32 repIndex = (U32)(curr+1 - offset_1);
1252 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1253 const BYTE* const repMatch = repBase + repIndex;
1254 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))
1255 if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
1256
1257 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1258 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1259 if (depth==0) goto _storeSequence;
1260 } }
1261
1262
1263 { size_t offsetFound = 999999999;
1264 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
1265 if (ml2 > matchLength)
1266 matchLength = ml2, start = ip, offset=offsetFound;
1267 }
1268
1269 if (matchLength < 4) {
1270 ip += ((ip-anchor) >> kSearchStrength) + 1;
1271 continue;
1272 }
1273
1274
1275 if (depth>=1)
1276 while (ip<ilimit) {
1277 ip ++;
1278 curr++;
1279
1280 if (offset) {
1281 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1282 const U32 repIndex = (U32)(curr - offset_1);
1283 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1284 const BYTE* const repMatch = repBase + repIndex;
1285 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))
1286 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1287
1288 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1289 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1290 int const gain2 = (int)(repLength * 3);
1291 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1292 if ((repLength >= 4) && (gain2 > gain1))
1293 matchLength = repLength, offset = 0, start = ip;
1294 } }
1295
1296
1297 { size_t offset2=999999999;
1298 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1299 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));
1300 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1301 if ((ml2 >= 4) && (gain2 > gain1)) {
1302 matchLength = ml2, offset = offset2, start = ip;
1303 continue;
1304 } }
1305
1306
1307 if ((depth==2) && (ip<ilimit)) {
1308 ip ++;
1309 curr++;
1310
1311 if (offset) {
1312 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1313 const U32 repIndex = (U32)(curr - offset_1);
1314 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1315 const BYTE* const repMatch = repBase + repIndex;
1316 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))
1317 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1318
1319 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1320 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1321 int const gain2 = (int)(repLength * 4);
1322 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1323 if ((repLength >= 4) && (gain2 > gain1))
1324 matchLength = repLength, offset = 0, start = ip;
1325 } }
1326
1327
1328 { size_t offset2=999999999;
1329 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1330 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));
1331 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1332 if ((ml2 >= 4) && (gain2 > gain1)) {
1333 matchLength = ml2, offset = offset2, start = ip;
1334 continue;
1335 } } }
1336 break;
1337 }
1338
1339
1340 if (offset) {
1341 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1342 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
1343 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
1344 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }
1345 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1346 }
1347
1348
1349 _storeSequence:
1350 { size_t const litLength = start - anchor;
1351 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1352 anchor = ip = start + matchLength;
1353 }
1354
1355
1356 while (ip <= ilimit) {
1357 const U32 repCurrent = (U32)(ip-base);
1358 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1359 const U32 repIndex = repCurrent - offset_2;
1360 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1361 const BYTE* const repMatch = repBase + repIndex;
1362 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))
1363 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1364
1365 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1366 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1367 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;
1368 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1369 ip += matchLength;
1370 anchor = ip;
1371 continue;
1372 }
1373 break;
1374 } }
1375
1376
1377 rep[0] = offset_1;
1378 rep[1] = offset_2;
1379
1380
1381 return (size_t)(iend - anchor);
1382 }
1383
1384
1385 size_t ZSTD_compressBlock_greedy_extDict(
1386 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1387 void const* src, size_t srcSize)
1388 {
1389 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1390 }
1391
1392 size_t ZSTD_compressBlock_lazy_extDict(
1393 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1394 void const* src, size_t srcSize)
1395
1396 {
1397 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1398 }
1399
1400 size_t ZSTD_compressBlock_lazy2_extDict(
1401 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1402 void const* src, size_t srcSize)
1403
1404 {
1405 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1406 }
1407
1408 size_t ZSTD_compressBlock_btlazy2_extDict(
1409 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1410 void const* src, size_t srcSize)
1411
1412 {
1413 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1414 }