diff options
Diffstat (limited to 'lib/zstd/compress/zstd_compress_internal.h')
-rw-r--r-- | lib/zstd/compress/zstd_compress_internal.h | 621 |
1 files changed, 425 insertions, 196 deletions
diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h index 71697a11ae30..b10978385876 100644 --- a/lib/zstd/compress/zstd_compress_internal.h +++ b/lib/zstd/compress/zstd_compress_internal.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -20,7 +21,8 @@ ***************************************/ #include "../common/zstd_internal.h" #include "zstd_cwksp.h" - +#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ +#include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */ /*-************************************* * Constants @@ -32,7 +34,7 @@ It's not a big deal though : candidate will just be sorted again. Additionally, candidate position 1 will be lost. But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ @@ -76,6 +78,70 @@ typedef struct { } ZSTD_entropyCTables_t; /* ********************************************* +* Sequences * +***********************************************/ +typedef struct SeqDef_s { + U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ + U16 litLength; + U16 mlBase; /* mlBase == matchLength - MINMATCH */ +} SeqDef; + +/* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; + +typedef struct { + SeqDef* sequencesStart; + SeqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} SeqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_SequenceLength; + +/* + * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq) +{ + ZSTD_SequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->mlBase + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { + seqLen.litLength += 0x10000; + } + if (seqStore->longLengthType == ZSTD_llt_matchLength) { + seqLen.matchLength += 0x10000; + } + } + return seqLen; +} + +const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + + +/* ********************************************* * Entropy buffer statistics structs and funcs * ***********************************************/ /* ZSTD_hufCTablesMetadata_t : @@ -84,7 +150,7 @@ typedef struct { * hufDesSize refers to the size of huffman tree description in bytes. * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ typedef struct { - symbolEncodingType_e hType; + SymbolEncodingType_e hType; BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; size_t hufDesSize; } ZSTD_hufCTablesMetadata_t; @@ -95,9 +161,9 @@ typedef struct { * fseTablesSize refers to the size of fse tables in bytes. * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; + SymbolEncodingType_e llType; + SymbolEncodingType_e ofType; + SymbolEncodingType_e mlType; BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; size_t fseTablesSize; size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ @@ -111,12 +177,13 @@ typedef struct { /* ZSTD_buildBlockEntropyStats() : * Builds entropy for the block. * @return : 0 on success or error code */ -size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize); +size_t ZSTD_buildBlockEntropyStats( + const SeqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); /* ******************************* * Compression internals structs * @@ -140,28 +207,29 @@ typedef struct { stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ size_t size; /* The number of sequences. <= capacity. */ size_t capacity; /* The capacity starting from `seq` pointer */ -} rawSeqStore_t; +} RawSeqStore_t; -UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; +UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef struct { - int price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; + int price; /* price from beginning of segment to this position */ + U32 off; /* offset of previous match */ + U32 mlen; /* length of previous match */ + U32 litlen; /* nb of literals since previous match */ + U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ } ZSTD_optimal_t; typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; +#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3) typedef struct { /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ unsigned* litFreq; /* table of literals statistics, of size 256 */ unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ U32 litSum; /* nb of literals */ U32 litLengthSum; /* nb of litLength codes */ @@ -173,7 +241,7 @@ typedef struct { U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ - ZSTD_paramSwitch_e literalCompressionMode; + ZSTD_ParamSwitch_e literalCompressionMode; } optState_t; typedef struct { @@ -195,11 +263,11 @@ typedef struct { #define ZSTD_WINDOW_START_INDEX 2 -typedef struct ZSTD_matchState_t ZSTD_matchState_t; +typedef struct ZSTD_MatchState_t ZSTD_MatchState_t; #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ -struct ZSTD_matchState_t { +struct ZSTD_MatchState_t { ZSTD_window_t window; /* State for window round buffer management */ U32 loadedDictEnd; /* index of end of dictionary, within context's referential. * When loadedDictEnd != 0, a dictionary is in use, and still valid. @@ -212,28 +280,42 @@ struct ZSTD_matchState_t { U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ - U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ + BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ + U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */ + U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ U32* hashTable; U32* hashTable3; U32* chainTable; - U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ + int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ int dedicatedDictSearch; /* Indicates whether this matchState is using the * dedicated dictionary search structure. */ optState_t opt; /* optimal parser state */ - const ZSTD_matchState_t* dictMatchState; + const ZSTD_MatchState_t* dictMatchState; ZSTD_compressionParameters cParams; - const rawSeqStore_t* ldmSeqStore; + const RawSeqStore_t* ldmSeqStore; + + /* Controls prefetching in some dictMatchState matchfinders. + * This behavior is controlled from the cctx ms. + * This parameter has no effect in the cdict ms. */ + int prefetchCDictTables; + + /* When == 0, lazy match finders insert every position. + * When != 0, lazy match finders only insert positions they search. + * This allows them to skip much faster over incompressible data, + * at a small cost to compression ratio. + */ + int lazySkipping; }; typedef struct { ZSTD_compressedBlockState_t* prevCBlock; ZSTD_compressedBlockState_t* nextCBlock; - ZSTD_matchState_t matchState; + ZSTD_MatchState_t matchState; } ZSTD_blockState_t; typedef struct { @@ -260,7 +342,7 @@ typedef struct { } ldmState_t; typedef struct { - ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ + ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ U32 hashLog; /* Log size of hashTable */ U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ U32 minMatchLength; /* Minimum match length */ @@ -291,7 +373,7 @@ struct ZSTD_CCtx_params_s { * There is no guarantee that hint is close to actual source size */ ZSTD_dictAttachPref_e attachDictPref; - ZSTD_paramSwitch_e literalCompressionMode; + ZSTD_ParamSwitch_e literalCompressionMode; /* Multithreading: used to pass parameters to mtctx */ int nbWorkers; @@ -310,24 +392,54 @@ struct ZSTD_CCtx_params_s { ZSTD_bufferMode_e outBufferMode; /* Sequence compression API */ - ZSTD_sequenceFormat_e blockDelimiters; + ZSTD_SequenceFormat_e blockDelimiters; int validateSequences; - /* Block splitting */ - ZSTD_paramSwitch_e useBlockSplitter; + /* Block splitting + * @postBlockSplitter executes split analysis after sequences are produced, + * it's more accurate but consumes more resources. + * @preBlockSplitter_level splits before knowing sequences, + * it's more approximative but also cheaper. + * Valid @preBlockSplitter_level values range from 0 to 6 (included). + * 0 means auto, 1 means do not split, + * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest). + * Highest @preBlockSplitter_level combines well with @postBlockSplitter. + */ + ZSTD_ParamSwitch_e postBlockSplitter; + int preBlockSplitter_level; + + /* Adjust the max block size*/ + size_t maxBlockSize; /* Param for deciding whether to use row-based matchfinder */ - ZSTD_paramSwitch_e useRowMatchFinder; + ZSTD_ParamSwitch_e useRowMatchFinder; /* Always load a dictionary in ext-dict mode (not prefix mode)? */ int deterministicRefPrefix; /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; + + /* Controls prefetching in some dictMatchState matchfinders */ + ZSTD_ParamSwitch_e prefetchCDictTables; + + /* Controls whether zstd will fall back to an internal matchfinder + * if the external matchfinder returns an error code. */ + int enableMatchFinderFallback; + + /* Parameters for the external sequence producer API. + * Users set these parameters through ZSTD_registerSequenceProducer(). + * It is not possible to set these parameters individually through the public API. */ + void* extSeqProdState; + ZSTD_sequenceProducer_F extSeqProdFunc; + + /* Controls repcode search in external sequence parsing */ + ZSTD_ParamSwitch_e searchForExternalRepcodes; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) +#define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE)) /* * Indicates whether this compression proceeds directly from user-provided @@ -345,11 +457,11 @@ typedef enum { */ #define ZSTD_MAX_NB_BLOCK_SPLITS 196 typedef struct { - seqStore_t fullSeqStoreChunk; - seqStore_t firstHalfSeqStore; - seqStore_t secondHalfSeqStore; - seqStore_t currSeqStore; - seqStore_t nextSeqStore; + SeqStore_t fullSeqStoreChunk; + SeqStore_t firstHalfSeqStore; + SeqStore_t secondHalfSeqStore; + SeqStore_t currSeqStore; + SeqStore_t nextSeqStore; U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; ZSTD_entropyCTablesMetadata_t entropyMetadata; @@ -366,7 +478,7 @@ struct ZSTD_CCtx_s { size_t dictContentSize; ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ - size_t blockSize; + size_t blockSizeMax; unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ unsigned long long consumedSrcSize; unsigned long long producedCSize; @@ -378,13 +490,14 @@ struct ZSTD_CCtx_s { int isFirstBlock; int initialized; - seqStore_t seqStore; /* sequences storage ptrs */ + SeqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ rawSeq* ldmSequences; /* Storage for the ldm output sequences */ size_t maxNbLdmSequences; - rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ + RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ ZSTD_blockState_t blockState; - U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ + void* tmpWorkspace; /* used as substitute of stack space - must be aligned for S64 type */ + size_t tmpWkspSize; /* Whether we are streaming or not */ ZSTD_buffered_policy_e bufferedPolicy; @@ -404,6 +517,7 @@ struct ZSTD_CCtx_s { /* Stable in/out buffer verification */ ZSTD_inBuffer expectedInBuffer; + size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */ size_t expectedOutBufferSize; /* Dictionary */ @@ -417,9 +531,14 @@ struct ZSTD_CCtx_s { /* Workspace for block splitter */ ZSTD_blockSplitCtx blockSplitCtx; + + /* Buffer for output from external sequence producer */ + ZSTD_Sequence* extSeqBuf; + size_t extSeqBufCapacity; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; +typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e; typedef enum { ZSTD_noDict = 0, @@ -441,17 +560,17 @@ typedef enum { * In this mode we take both the source size and the dictionary size * into account when selecting and adjusting the parameters. */ - ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. + ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. * We don't know what these parameters are for. We default to the legacy * behavior of taking both the source size and the dict size into account * when selecting and adjusting parameters. */ -} ZSTD_cParamMode_e; +} ZSTD_CParamMode_e; -typedef size_t (*ZSTD_blockCompressor) ( - ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], +typedef size_t (*ZSTD_BlockCompressor_f) ( + ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); +ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -497,12 +616,33 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) return 1; } +/* ZSTD_selectAddr: + * @return index >= lowLimit ? candidate : backup, + * tries to force branchless codegen. */ +MEM_STATIC const BYTE* +ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup) +{ +#if defined(__x86_64__) + __asm__ ( + "cmp %1, %2\n" + "cmova %3, %0\n" + : "+r"(candidate) + : "r"(index), "r"(lowLimit), "r"(backup) + ); + return candidate; +#else + return index >= lowLimit ? candidate : backup; +#endif +} + /* ZSTD_noCompressBlock() : * Writes uncompressed block to dst buffer from given src. * Returns the size of the block */ -MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +MEM_STATIC size_t +ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) { U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, dstSize_tooSmall, "dst buf too small for uncompressed block"); MEM_writeLE24(dst, cBlockHeader24); @@ -510,7 +650,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi return ZSTD_blockHeaderSize + srcSize; } -MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +MEM_STATIC size_t +ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) { BYTE* const op = (BYTE*)dst; U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); @@ -529,7 +670,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) { U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); return (srcSize >> minlog) + 2; } @@ -565,29 +706,68 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con while (ip < iend) *op++ = *ip++; } -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) -#define STORE_REPCODE_1 STORE_REPCODE(1) -#define STORE_REPCODE_2 STORE_REPCODE(2) -#define STORE_REPCODE_3 STORE_REPCODE(3) -#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) -#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) -#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) -#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) -#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) -#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ -#define STORED_TO_OFFBASE(o) ((o)+1) -#define OFFBASE_TO_STORED(o) ((o)-1) + +#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) +#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) +#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) +#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ +#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) +#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) +#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) +#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) +#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ + +/*! ZSTD_storeSeqOnly() : + * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. + * Literals themselves are not copied, but @litPtr is updated. + * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). + * @matchLength : must be >= MINMATCH +*/ +HINT_INLINE UNUSED_ATTR void +ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr, + size_t litLength, + U32 offBase, + size_t matchLength) +{ + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + + /* literal Length */ + assert(litLength <= ZSTD_BLOCKSIZE_MAX); + if (UNLIKELY(litLength>0xFFFF)) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_literalLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offBase = offBase; + + /* match Length */ + assert(matchLength <= ZSTD_BLOCKSIZE_MAX); + assert(matchLength >= MINMATCH); + { size_t const mlBase = matchLength - MINMATCH; + if (UNLIKELY(mlBase>0xFFFF)) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_matchLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].mlBase = (U16)mlBase; + } + + seqStorePtr->sequences++; +} /*! ZSTD_storeSeq() : - * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. - * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). + * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. + * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). * @matchLength : must be >= MINMATCH - * Allowed to overread literals up to litLimit. + * Allowed to over-read literals up to litLimit. */ HINT_INLINE UNUSED_ATTR void -ZSTD_storeSeq(seqStore_t* seqStorePtr, +ZSTD_storeSeq(SeqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, - U32 offBase_minus1, + U32 offBase, size_t matchLength) { BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; @@ -596,8 +776,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, static const BYTE* g_start = NULL; if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); - DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", + pos, (U32)litLength, (U32)matchLength, (U32)offBase); } #endif assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); @@ -607,9 +787,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, assert(literals + litLength <= litLimit); if (litEnd <= litLimit_w) { /* Common case we can use wildcopy. - * First copy 16 bytes, because literals are likely short. - */ - assert(WILDCOPY_OVERLENGTH >= 16); + * First copy 16 bytes, because literals are likely short. + */ + ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); ZSTD_copy16(seqStorePtr->lit, literals); if (litLength > 16) { ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); @@ -619,44 +799,22 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, } seqStorePtr->lit += litLength; - /* literal Length */ - if (litLength>0xFFFF) { - assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ - seqStorePtr->longLengthType = ZSTD_llt_literalLength; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].litLength = (U16)litLength; - - /* match offset */ - seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); - - /* match Length */ - assert(matchLength >= MINMATCH); - { size_t const mlBase = matchLength - MINMATCH; - if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ - seqStorePtr->longLengthType = ZSTD_llt_matchLength; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].mlBase = (U16)mlBase; - } - - seqStorePtr->sequences++; + ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength); } /* ZSTD_updateRep() : * updates in-place @rep (array of repeat offsets) - * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() + * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() */ MEM_STATIC void -ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) { - if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ + if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ rep[2] = rep[1]; rep[1] = rep[0]; - rep[0] = STORED_OFFSET(offBase_minus1); + rep[0] = OFFBASE_TO_OFFSET(offBase); } else { /* repcode */ - U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; + U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; if (repCode > 0) { /* note : if repCode==0, no change */ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; rep[2] = (repCode >= 2) ? rep[1] : rep[2]; @@ -670,14 +828,14 @@ ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) typedef struct repcodes_s { U32 rep[3]; -} repcodes_t; +} Repcodes_t; -MEM_STATIC repcodes_t -ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +MEM_STATIC Repcodes_t +ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) { - repcodes_t newReps; + Repcodes_t newReps; ZSTD_memcpy(&newReps, rep, sizeof(newReps)); - ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); + ZSTD_updateRep(newReps.rep, offBase, ll0); return newReps; } @@ -685,59 +843,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0 /*-************************************* * Match length counter ***************************************/ -static unsigned ZSTD_NbCommonBytes (size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if (__GNUC__ >= 4) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if (__GNUC__ >= 4) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) { const BYTE* const pStart = pIn; @@ -771,8 +876,8 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match, size_t const matchLength = ZSTD_count(ip, match, vEnd); if (match + matchLength != mEnd) return matchLength; DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); - DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); - DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); + DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match)); + DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip)); DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); @@ -783,32 +888,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match, * Hashes ***************************************/ static const U32 prime3bytes = 506832829U; -static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } -MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ +static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ +MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } +static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } +static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } -static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } +static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } +static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } -static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } +static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } +static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } + MEM_STATIC FORCE_INLINE_ATTR size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) { + /* Although some of these hashes do support hBits up to 64, some do not. + * To be on the safe side, always avoid hBits > 32. */ + assert(hBits <= 32); + switch(mls) { default: @@ -820,6 +936,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) } } +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { + /* Although some of these hashes do support hBits up to 64, some do not. + * To be on the safe side, always avoid hBits > 32. */ + assert(hBits <= 32); + + switch(mls) + { + default: + case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); + case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); + case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); + case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); + case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); + } +} + + /* ZSTD_ipow() : * Return base^exponent. */ @@ -881,11 +1015,12 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 /*-************************************* * Round buffer management ***************************************/ -#if (ZSTD_WINDOWLOG_MAX_64 > 31) -# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" -#endif -/* Max current allowed */ -#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Max @current value allowed: + * In 32-bit mode: we want to avoid crossing the 2 GB limit, + * reducing risks of side effects in case of signed operations on indexes. + * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB) + * doesn't overflow U32 index capacity (4 GB) */ +#define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB) /* Maximum chunk size before overflow correction needs to be called again */ #define ZSTD_CHUNKSIZE_MAX \ ( ((U32)-1) /* Maximum ending current index */ \ @@ -925,7 +1060,7 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) * Inspects the provided matchState and figures out what dictMode should be * passed to the compressor. */ -MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms) { return ZSTD_window_hasExtDict(ms->window) ? ZSTD_extDict : @@ -1011,7 +1146,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, * The least significant cycleLog bits of the indices must remain the same, * which may be 0. Every index up to maxDist in the past must be valid. */ -MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, +MEM_STATIC +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, U32 maxDist, void const* src) { /* preemptive overflow correction: @@ -1112,7 +1249,7 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window, const void* blockEnd, U32 maxDist, U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) + const ZSTD_MatchState_t** dictMatchStatePtr) { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; @@ -1157,7 +1294,7 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, const void* blockEnd, U32 maxDist, U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) + const ZSTD_MatchState_t** dictMatchStatePtr) { assert(loadedDictEndPtr != NULL); assert(dictMatchStatePtr != NULL); @@ -1167,10 +1304,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); assert(blockEndIdx >= loadedDictEnd); - if (blockEndIdx > loadedDictEnd + maxDist) { + if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { /* On reaching window size, dictionaries are invalidated. * For simplification, if window size is reached anywhere within next block, * the dictionary is invalidated for the full block. + * + * We also have to invalidate the dictionary if ZSTD_window_update() has detected + * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. + * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use + * dictMatchState, so setting it to NULL is not a problem. */ DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); *loadedDictEndPtr = 0; @@ -1199,9 +1341,11 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { * forget about the extDict. Handles overlap of the prefix and extDict. * Returns non-zero if the segment is contiguous. */ -MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize, - int forceNonContiguous) +MEM_STATIC +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_window_update(ZSTD_window_t* window, + const void* src, size_t srcSize, + int forceNonContiguous) { BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; @@ -1228,8 +1372,9 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ if ( (ip+srcSize > window->dictBase + window->lowLimit) & (ip < window->dictBase + window->dictLimit)) { - ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase); + U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + assert(highInputIdx < UINT_MAX); window->lowLimit = lowLimitMax; DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); } @@ -1239,7 +1384,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, /* * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. */ -MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) { U32 const maxDistance = 1U << windowLog; U32 const lowestValid = ms->window.lowLimit; @@ -1256,7 +1401,7 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, u /* * Returns the lowest allowed match index in the prefix. */ -MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) { U32 const maxDistance = 1U << windowLog; U32 const lowestValid = ms->window.dictLimit; @@ -1269,6 +1414,13 @@ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, return matchLowest; } +/* index_safety_check: + * intentional underflow : ensure repIndex isn't overlapping dict + prefix + * @return 1 if values are not overlapping, + * 0 otherwise */ +MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) { + return ((U32)((prefixLowestIndex-1) - repIndex) >= 3); +} /* debug functions */ @@ -1302,7 +1454,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) #endif +/* Short Cache */ + +/* Normally, zstd matchfinders follow this flow: + * 1. Compute hash at ip + * 2. Load index from hashTable[hash] + * 3. Check if *ip == *(base + index) + * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. + * + * Short cache is an optimization which allows us to avoid step 3 most of the time + * when the data doesn't actually match. With short cache, the flow becomes: + * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. + * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. + * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. + * + * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to + * dictMatchState matchfinders. + */ +#define ZSTD_SHORT_CACHE_TAG_BITS 8 +#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) + +/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. + * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ +MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { + size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; + U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); + assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); + hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; +} +/* Helper function for short cache matchfinders. + * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ +MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { + U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; + U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; + return tag1 == tag2; +} /* =============================================================== * Shared internal declarations @@ -1319,6 +1506,25 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_SequencePosition; + +/* for benchmark */ +size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, + const ZSTD_Sequence* const inSeqs, size_t nbSequences, + int const repcodeResolution); + +typedef struct { + size_t nbSequences; + size_t blockSize; + size_t litSize; +} BlockSummary; + +BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs); + /* ============================================================== * Private declarations * These prototypes shall only be called from within lib/compress @@ -1330,7 +1536,7 @@ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); * Note: srcSizeHint == 0 means 0! */ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); /*! ZSTD_initCStream_internal() : * Private use only. Init streaming operation. @@ -1342,7 +1548,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); -void ZSTD_resetSeqStore(seqStore_t* ssPtr); +void ZSTD_resetSeqStore(SeqStore_t* ssPtr); /*! ZSTD_getCParamsFromCDict() : * as the name implies */ @@ -1381,11 +1587,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); * This cannot be used when long range matching is enabled. * Zstd will use these sequences, and pass the literals to a secondary block * compressor. - * @return : An error code on failure. * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory * access and data corruption. */ -size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); +void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); /* ZSTD_cycleLog() : * condition for correct operation : hashLog > 1 */ @@ -1396,4 +1601,28 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); */ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); +/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */ +MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) { + return params->extSeqProdFunc != NULL; +} + +/* =============================================================== + * Deprecated definitions that are still used internally to avoid + * deprecation warnings. These functions are exactly equivalent to + * their public variants, but avoid the deprecation warnings. + * =============================================================== */ + +size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + #endif /* ZSTD_COMPRESS_H */ |