mirror of
https://github.com/Kitware/CMake.git
synced 2026-05-07 06:40:16 -05:00
Merge branch 'upstream-zstd' into update-zstd
* upstream-zstd: zstd 2023-04-04 (63779c79)
This commit is contained in:
@@ -2,7 +2,7 @@ BSD License
|
||||
|
||||
For Zstandard software
|
||||
|
||||
Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
|
||||
Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
@@ -14,9 +14,9 @@ are permitted provided that the following conditions are met:
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name Facebook nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this software without specific
|
||||
prior written permission.
|
||||
* Neither the name Facebook, nor Meta, nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
|
||||
+55
-31
@@ -4,23 +4,21 @@ __Zstandard__, or `zstd` as short version, is a fast lossless compression algori
|
||||
targeting real-time compression scenarios at zlib-level and better compression ratios.
|
||||
It's backed by a very fast entropy stage, provided by [Huff0 and FSE library](https://github.com/Cyan4973/FiniteStateEntropy).
|
||||
|
||||
The project is provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library,
|
||||
Zstandard's format is stable and documented in [RFC8878](https://datatracker.ietf.org/doc/html/rfc8878). Multiple independent implementations are already available.
|
||||
This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library,
|
||||
and a command line utility producing and decoding `.zst`, `.gz`, `.xz` and `.lz4` files.
|
||||
Should your project require another programming language,
|
||||
a list of known ports and bindings is provided on [Zstandard homepage](http://www.zstd.net/#other-languages).
|
||||
a list of known ports and bindings is provided on [Zstandard homepage](https://facebook.github.io/zstd/#other-languages).
|
||||
|
||||
**Development branch status:**
|
||||
|
||||
[![Build Status][travisDevBadge]][travisLink]
|
||||
[![Build status][AppveyorDevBadge]][AppveyorLink]
|
||||
[![Build status][CircleDevBadge]][CircleLink]
|
||||
[![Build status][CirrusDevBadge]][CirrusLink]
|
||||
[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
|
||||
|
||||
[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
|
||||
[travisLink]: https://travis-ci.org/facebook/zstd
|
||||
[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/xt38wbdxjk5mrbem/branch/dev?svg=true "Windows test suite"
|
||||
[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0
|
||||
[travisDevBadge]: https://api.travis-ci.com/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
|
||||
[travisLink]: https://travis-ci.com/facebook/zstd
|
||||
[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
|
||||
[CircleLink]: https://circleci.com/gh/facebook/zstd
|
||||
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
|
||||
@@ -31,37 +29,36 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
|
||||
## Benchmarks
|
||||
|
||||
For reference, several fast compression algorithms were tested and compared
|
||||
on a server running Arch Linux (`Linux version 5.5.11-arch1-1`),
|
||||
with a Core i9-9900K CPU @ 5.0GHz,
|
||||
on a desktop running Ubuntu 20.04 (`Linux 5.11.0-41-generic`),
|
||||
with a Core i7-9700K CPU @ 4.9GHz,
|
||||
using [lzbench], an open-source in-memory benchmark by @inikep
|
||||
compiled with [gcc] 9.3.0,
|
||||
on the [Silesia compression corpus].
|
||||
|
||||
[lzbench]: https://github.com/inikep/lzbench
|
||||
[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
|
||||
[Silesia compression corpus]: https://sun.aei.polsl.pl//~sdeor/index.php?page=silesia
|
||||
[gcc]: https://gcc.gnu.org/
|
||||
|
||||
| Compressor name | Ratio | Compression| Decompress.|
|
||||
| --------------- | ------| -----------| ---------- |
|
||||
| **zstd 1.4.5 -1** | 2.884 | 500 MB/s | 1660 MB/s |
|
||||
| zlib 1.2.11 -1 | 2.743 | 90 MB/s | 400 MB/s |
|
||||
| brotli 1.0.7 -0 | 2.703 | 400 MB/s | 450 MB/s |
|
||||
| **zstd 1.4.5 --fast=1** | 2.434 | 570 MB/s | 2200 MB/s |
|
||||
| **zstd 1.4.5 --fast=3** | 2.312 | 640 MB/s | 2300 MB/s |
|
||||
| quicklz 1.5.0 -1 | 2.238 | 560 MB/s | 710 MB/s |
|
||||
| **zstd 1.4.5 --fast=5** | 2.178 | 700 MB/s | 2420 MB/s |
|
||||
| lzo1x 2.10 -1 | 2.106 | 690 MB/s | 820 MB/s |
|
||||
| lz4 1.9.2 | 2.101 | 740 MB/s | 4530 MB/s |
|
||||
| **zstd 1.4.5 --fast=7** | 2.096 | 750 MB/s | 2480 MB/s |
|
||||
| lzf 3.6 -1 | 2.077 | 410 MB/s | 860 MB/s |
|
||||
| snappy 1.1.8 | 2.073 | 560 MB/s | 1790 MB/s |
|
||||
| **zstd 1.5.1 -1** | 2.887 | 530 MB/s | 1700 MB/s |
|
||||
| [zlib] 1.2.11 -1 | 2.743 | 95 MB/s | 400 MB/s |
|
||||
| brotli 1.0.9 -0 | 2.702 | 395 MB/s | 450 MB/s |
|
||||
| **zstd 1.5.1 --fast=1** | 2.437 | 600 MB/s | 2150 MB/s |
|
||||
| **zstd 1.5.1 --fast=3** | 2.239 | 670 MB/s | 2250 MB/s |
|
||||
| quicklz 1.5.0 -1 | 2.238 | 540 MB/s | 760 MB/s |
|
||||
| **zstd 1.5.1 --fast=4** | 2.148 | 710 MB/s | 2300 MB/s |
|
||||
| lzo1x 2.10 -1 | 2.106 | 660 MB/s | 845 MB/s |
|
||||
| [lz4] 1.9.3 | 2.101 | 740 MB/s | 4500 MB/s |
|
||||
| lzf 3.6 -1 | 2.077 | 410 MB/s | 830 MB/s |
|
||||
| snappy 1.1.9 | 2.073 | 550 MB/s | 1750 MB/s |
|
||||
|
||||
[zlib]: http://www.zlib.net/
|
||||
[LZ4]: http://www.lz4.org/
|
||||
[zlib]: https://www.zlib.net/
|
||||
[lz4]: https://lz4.github.io/lz4/
|
||||
|
||||
The negative compression levels, specified with `--fast=#`,
|
||||
offer faster compression and decompression speed in exchange for some loss in
|
||||
compression ratio compared to level 1, as seen in the table above.
|
||||
offer faster compression and decompression speed
|
||||
at the cost of compression ratio (compared to level 1).
|
||||
|
||||
Zstd can also offer stronger compression ratios at the cost of compression speed.
|
||||
Speed vs Compression trade-off is configurable by small increments.
|
||||
@@ -124,14 +121,27 @@ Dictionary gains are mostly effective in the first few KB. Then, the compression
|
||||
|
||||
## Build instructions
|
||||
|
||||
`make` is the officially maintained build system of this project.
|
||||
All other build systems are "compatible" and 3rd-party maintained,
|
||||
they may feature small differences in advanced options.
|
||||
When your system allows it, prefer using `make` to build `zstd` and `libzstd`.
|
||||
|
||||
### Makefile
|
||||
|
||||
If your system is compatible with standard `make` (or `gmake`),
|
||||
invoking `make` in root directory will generate `zstd` cli in root directory.
|
||||
It will also create `libzstd` into `lib/`.
|
||||
|
||||
Other available options include:
|
||||
- `make install` : create and install zstd cli, library and man pages
|
||||
- `make check` : create and run `zstd`, tests its behavior on local platform
|
||||
- `make check` : create and run `zstd`, test its behavior on local platform
|
||||
|
||||
The `Makefile` follows the [GNU Standard Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html),
|
||||
allowing staged install, standard flags, directory variables and command variables.
|
||||
|
||||
For advanced use cases, specialized compilation flags which control binary generation
|
||||
are documented in [`lib/README.md`](lib/README.md#modular-build) for the `libzstd` library
|
||||
and in [`programs/README.md`](programs/README.md#compilation-variables) for the `zstd` CLI.
|
||||
|
||||
### cmake
|
||||
|
||||
@@ -141,6 +151,18 @@ to create `zstd` binary, and `libzstd` dynamic and static libraries.
|
||||
|
||||
By default, `CMAKE_BUILD_TYPE` is set to `Release`.
|
||||
|
||||
#### Support for Fat (Universal2) Output
|
||||
|
||||
`zstd` can be built and installed with support for both Apple Silicon (M1/M2) as well as Intel by using CMake's Universal2 support.
|
||||
To perform a Fat/Universal2 build and install use the following commands:
|
||||
|
||||
```bash
|
||||
cmake -B build-cmake-debug -S build/cmake -G Ninja -DCMAKE_OSX_ARCHITECTURES="x86_64;x86_64h;arm64"
|
||||
cd build-cmake-debug
|
||||
ninja
|
||||
sudo ninja install
|
||||
```
|
||||
|
||||
### Meson
|
||||
|
||||
A Meson project is provided within [`build/meson`](build/meson). Follow
|
||||
@@ -178,13 +200,15 @@ The output binary will be in `buck-out/gen/programs/`.
|
||||
|
||||
## Testing
|
||||
|
||||
You can run quick local smoke tests by executing the `playTest.sh` script from the `src/tests` directory.
|
||||
Two env variables `$ZSTD_BIN` and `$DATAGEN_BIN` are needed for the test script to locate the zstd and datagen binary.
|
||||
For information on CI testing, please refer to TESTING.md
|
||||
You can run quick local smoke tests by running `make check`.
|
||||
If you can't use `make`, execute the `playTest.sh` script from the `src/tests` directory.
|
||||
Two env variables `$ZSTD_BIN` and `$DATAGEN_BIN` are needed for the test script to locate the `zstd` and `datagen` binary.
|
||||
For information on CI testing, please refer to `TESTING.md`.
|
||||
|
||||
## Status
|
||||
|
||||
Zstandard is currently deployed within Facebook. It is used continuously to compress large amounts of data in multiple formats and use cases.
|
||||
Zstandard is currently deployed within Facebook and many other large cloud infrastructures.
|
||||
It is run continuously to compress large amounts of data in multiple formats and use cases.
|
||||
Zstandard is considered safe for production environments.
|
||||
|
||||
## License
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* This file provides custom allocation primitives
|
||||
*/
|
||||
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
|
||||
|
||||
#include "mem.h" /* MEM_STATIC */
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "../zstd.h" /* ZSTD_customMem */
|
||||
|
||||
#ifndef ZSTD_ALLOCATIONS_H
|
||||
#define ZSTD_ALLOCATIONS_H
|
||||
|
||||
/* custom memory allocation functions */
|
||||
|
||||
MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc)
|
||||
return customMem.customAlloc(customMem.opaque, size);
|
||||
return ZSTD_malloc(size);
|
||||
}
|
||||
|
||||
MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc) {
|
||||
/* calloc implemented as malloc+memset;
|
||||
* not as efficient as calloc, but next best guess for custom malloc */
|
||||
void* const ptr = customMem.customAlloc(customMem.opaque, size);
|
||||
ZSTD_memset(ptr, 0, size);
|
||||
return ptr;
|
||||
}
|
||||
return ZSTD_calloc(1, size);
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
|
||||
{
|
||||
if (ptr!=NULL) {
|
||||
if (customMem.customFree)
|
||||
customMem.customFree(customMem.opaque, ptr);
|
||||
else
|
||||
ZSTD_free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ZSTD_ALLOCATIONS_H */
|
||||
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_BITS_H
|
||||
#define ZSTD_BITS_H
|
||||
|
||||
#include "mem.h"
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
|
||||
30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7,
|
||||
26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_tzcnt_u32(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward(&r, val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (unsigned)__builtin_ctz(val);
|
||||
# else
|
||||
return ZSTD_countTrailingZeros32_fallback(val);
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
|
||||
assert(val != 0);
|
||||
{
|
||||
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
|
||||
11, 14, 16, 18, 22, 25, 3, 30,
|
||||
8, 12, 20, 28, 15, 17, 24, 7,
|
||||
19, 27, 23, 6, 26, 5, 4, 31};
|
||||
val |= val >> 1;
|
||||
val |= val >> 2;
|
||||
val |= val >> 4;
|
||||
val |= val >> 8;
|
||||
val |= val >> 16;
|
||||
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_lzcnt_u32(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse(&r, val);
|
||||
return (unsigned)(31 - r);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (unsigned)__builtin_clz(val);
|
||||
# else
|
||||
return ZSTD_countLeadingZeros32_fallback(val);
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_tzcnt_u64(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward64(&r, val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
|
||||
return (unsigned)__builtin_ctzll(val);
|
||||
# else
|
||||
{
|
||||
U32 mostSignificantWord = (U32)(val >> 32);
|
||||
U32 leastSignificantWord = (U32)val;
|
||||
if (leastSignificantWord == 0) {
|
||||
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
|
||||
} else {
|
||||
return ZSTD_countTrailingZeros32(leastSignificantWord);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_lzcnt_u64(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse64(&r, val);
|
||||
return (unsigned)(63 - r);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (unsigned)(__builtin_clzll(val));
|
||||
# else
|
||||
{
|
||||
U32 mostSignificantWord = (U32)(val >> 32);
|
||||
U32 leastSignificantWord = (U32)val;
|
||||
if (mostSignificantWord == 0) {
|
||||
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
|
||||
} else {
|
||||
return ZSTD_countLeadingZeros32(mostSignificantWord);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
|
||||
{
|
||||
if (MEM_isLittleEndian()) {
|
||||
if (MEM_64bits()) {
|
||||
return ZSTD_countTrailingZeros64((U64)val) >> 3;
|
||||
} else {
|
||||
return ZSTD_countTrailingZeros32((U32)val) >> 3;
|
||||
}
|
||||
} else { /* Big Endian CPU */
|
||||
if (MEM_64bits()) {
|
||||
return ZSTD_countLeadingZeros64((U64)val) >> 3;
|
||||
} else {
|
||||
return ZSTD_countLeadingZeros32((U32)val) >> 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
|
||||
{
|
||||
assert(val != 0);
|
||||
return 31 - ZSTD_countLeadingZeros32(val);
|
||||
}
|
||||
|
||||
/* ZSTD_rotateRight_*():
|
||||
* Rotates a bitfield to the right by "count" bits.
|
||||
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
|
||||
*/
|
||||
MEM_STATIC
|
||||
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
|
||||
assert(count < 64);
|
||||
count &= 0x3F; /* for fickle pattern recognition */
|
||||
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
|
||||
}
|
||||
|
||||
MEM_STATIC
|
||||
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
|
||||
assert(count < 32);
|
||||
count &= 0x1F; /* for fickle pattern recognition */
|
||||
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
|
||||
}
|
||||
|
||||
MEM_STATIC
|
||||
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
|
||||
assert(count < 16);
|
||||
count &= 0x0F; /* for fickle pattern recognition */
|
||||
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
|
||||
}
|
||||
|
||||
#endif /* ZSTD_BITS_H */
|
||||
@@ -1,7 +1,7 @@
|
||||
/* ******************************************************************
|
||||
* bitstream
|
||||
* Part of FSE library
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
@@ -32,14 +32,15 @@ extern "C" {
|
||||
#include "compiler.h" /* UNLIKELY() */
|
||||
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
||||
#include "error_private.h" /* error codes and messages */
|
||||
#include "bits.h" /* ZSTD_highbit32 */
|
||||
|
||||
|
||||
/*=========================================
|
||||
* Target specific
|
||||
=========================================*/
|
||||
#ifndef ZSTD_NO_INTRINSICS
|
||||
# if defined(__BMI__) && defined(__GNUC__)
|
||||
# include <immintrin.h> /* support for bextr (experimental) */
|
||||
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
|
||||
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
|
||||
# elif defined(__ICCARM__)
|
||||
# include <intrinsics.h>
|
||||
# endif
|
||||
@@ -134,42 +135,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
|
||||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
|
||||
/* faster, but works only if nbBits >= 1 */
|
||||
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Internal functions
|
||||
****************************************************************/
|
||||
MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
# if STATIC_BMI2 == 1
|
||||
return _lzcnt_u32(val) ^ 31;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
return 31 - __CLZ(val);
|
||||
# else /* Software version */
|
||||
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
|
||||
11, 14, 16, 18, 22, 25, 3, 30,
|
||||
8, 12, 20, 28, 15, 17, 24, 7,
|
||||
19, 27, 23, 6, 26, 5, 4, 31 };
|
||||
U32 v = val;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
/*===== Local Constants =====*/
|
||||
static const unsigned BIT_mask[] = {
|
||||
0, 1, 3, 7, 0xF, 0x1F,
|
||||
@@ -199,6 +164,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
|
||||
return 0;
|
||||
}
|
||||
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
{
|
||||
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
|
||||
return _bzhi_u64(bitContainer, nbBits);
|
||||
#else
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
return bitContainer & BIT_mask[nbBits];
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! BIT_addBits() :
|
||||
* can add up to 31 bits into `bitC`.
|
||||
* Note : does not check for register overflow ! */
|
||||
@@ -208,7 +183,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
||||
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
||||
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
|
||||
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
|
||||
bitC->bitPos += nbBits;
|
||||
}
|
||||
|
||||
@@ -287,7 +262,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
||||
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
|
||||
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
||||
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
||||
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
|
||||
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
|
||||
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
|
||||
} else {
|
||||
bitD->ptr = bitD->start;
|
||||
@@ -295,27 +270,27 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
||||
switch(srcSize)
|
||||
{
|
||||
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
default: break;
|
||||
}
|
||||
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
||||
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
|
||||
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
||||
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
|
||||
}
|
||||
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
|
||||
@@ -334,16 +309,15 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
|
||||
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
||||
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||
}
|
||||
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
{
|
||||
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
|
||||
return _bzhi_u64(bitContainer, nbBits);
|
||||
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
|
||||
* than accessing memory. When bmi2 instruction is not present, we consider
|
||||
* such cpus old (pre-Haswell, 2013) and their performance is not of that
|
||||
* importance.
|
||||
*/
|
||||
#if defined(__x86_64__) || defined(_M_X86)
|
||||
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
|
||||
#else
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
return bitContainer & BIT_mask[nbBits];
|
||||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -393,7 +367,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
|
||||
}
|
||||
|
||||
/*! BIT_readBitsFast() :
|
||||
* unsafe version; only works only if nbBits >= 1 */
|
||||
* unsafe version; only works if nbBits >= 1 */
|
||||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t const value = BIT_lookBitsFast(bitD, nbBits);
|
||||
@@ -424,7 +398,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
|
||||
* This function is safe, it guarantees it will not read beyond src buffer.
|
||||
* @return : status of `BIT_DStream_t` internal register.
|
||||
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
|
||||
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
||||
MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
||||
{
|
||||
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
|
||||
return BIT_DStream_overflow;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef ZSTD_COMPILER_H
|
||||
#define ZSTD_COMPILER_H
|
||||
|
||||
#include "portability_macros.h"
|
||||
|
||||
/*-*******************************************************
|
||||
* Compiler specifics
|
||||
*********************************************************/
|
||||
@@ -40,7 +42,7 @@
|
||||
|
||||
/**
|
||||
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
|
||||
This explictly marks such functions as __cdecl so that the code will still compile
|
||||
This explicitly marks such functions as __cdecl so that the code will still compile
|
||||
if a CC other than __cdecl has been made the default.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
@@ -92,29 +94,17 @@
|
||||
|
||||
|
||||
/* target attribute */
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
||||
#endif
|
||||
#if defined(__GNUC__) || defined(__ICCARM__)
|
||||
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
||||
#else
|
||||
# define TARGET_ATTRIBUTE(target)
|
||||
#endif
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
/* Target attribute for BMI2 dynamic dispatch.
|
||||
* Enable lzcnt, bmi, and bmi2.
|
||||
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
|
||||
*/
|
||||
#ifndef DYNAMIC_BMI2
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X86)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
# define DYNAMIC_BMI2 0
|
||||
#endif
|
||||
#endif
|
||||
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
|
||||
|
||||
/* prefetch
|
||||
* can be disabled, by declaring NO_PREFETCH build macro */
|
||||
@@ -150,8 +140,9 @@
|
||||
}
|
||||
|
||||
/* vectorization
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && !defined(__LCC__) && defined(__GNUC__)
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
|
||||
* and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
|
||||
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
|
||||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
# else
|
||||
@@ -174,6 +165,12 @@
|
||||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
|
||||
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
|
||||
#else
|
||||
# define ZSTD_UNREACHABLE { assert(0); }
|
||||
#endif
|
||||
|
||||
/* disable warnings */
|
||||
#ifdef _MSC_VER /* Visual Studio */
|
||||
# include <intrin.h> /* For Visual 2005 */
|
||||
@@ -190,6 +187,8 @@
|
||||
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
|
||||
# define STATIC_BMI2 1
|
||||
# endif
|
||||
# elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__)
|
||||
# define STATIC_BMI2 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -197,26 +196,103 @@
|
||||
#define STATIC_BMI2 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
#ifndef ZSTD_MEMORY_SANITIZER
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# define ZSTD_MEMORY_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_MEMORY_SANITIZER 0
|
||||
/* compile time determination of SIMD support */
|
||||
#if !defined(ZSTD_NO_INTRINSICS)
|
||||
# if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
|
||||
# define ZSTD_ARCH_X86_SSE2
|
||||
# endif
|
||||
# if defined(__ARM_NEON) || defined(_M_ARM64)
|
||||
# define ZSTD_ARCH_ARM_NEON
|
||||
# endif
|
||||
#
|
||||
# if defined(ZSTD_ARCH_X86_SSE2)
|
||||
# include <emmintrin.h>
|
||||
# elif defined(ZSTD_ARCH_ARM_NEON)
|
||||
# include <arm_neon.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ZSTD_MEMORY_SANITIZER
|
||||
/* C-language Attributes are added in C23. */
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
||||
# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
|
||||
#else
|
||||
# define ZSTD_HAS_C_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
|
||||
/* Only use C++ attributes in C++. Some compilers report support for C++
|
||||
* attributes when compiling with C.
|
||||
*/
|
||||
#if defined(__cplusplus) && defined(__has_cpp_attribute)
|
||||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
||||
#else
|
||||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
|
||||
/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
|
||||
* - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
|
||||
* - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
|
||||
* - Else: __attribute__((__fallthrough__))
|
||||
*/
|
||||
#ifndef ZSTD_FALLTHROUGH
|
||||
# if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
|
||||
# define ZSTD_FALLTHROUGH [[fallthrough]]
|
||||
# elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
|
||||
# define ZSTD_FALLTHROUGH [[fallthrough]]
|
||||
# elif __has_attribute(__fallthrough__)
|
||||
/* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
|
||||
* gcc complains about: a label can only be part of a statement and a declaration is not a statement.
|
||||
*/
|
||||
# define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
|
||||
# else
|
||||
# define ZSTD_FALLTHROUGH
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*-**************************************************************
|
||||
* Alignment check
|
||||
*****************************************************************/
|
||||
|
||||
/* this test was initially positioned in mem.h,
|
||||
* but this file is removed (or replaced) for linux kernel
|
||||
* so it's now hosted in compiler.h,
|
||||
* which remains valid for both user & kernel spaces.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_ALIGNOF
|
||||
# if defined(__GNUC__) || defined(_MSC_VER)
|
||||
/* covers gcc, clang & MSVC */
|
||||
/* note : this section must come first, before C11,
|
||||
* due to a limitation in the kernel source generator */
|
||||
# define ZSTD_ALIGNOF(T) __alignof(T)
|
||||
|
||||
# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
|
||||
/* C11 support */
|
||||
# include <stdalign.h>
|
||||
# define ZSTD_ALIGNOF(T) alignof(T)
|
||||
|
||||
# else
|
||||
/* No known support for alignof() - imperfect backup */
|
||||
# define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
|
||||
|
||||
# endif
|
||||
#endif /* ZSTD_ALIGNOF */
|
||||
|
||||
/*-**************************************************************
|
||||
* Sanitizer
|
||||
*****************************************************************/
|
||||
|
||||
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
|
||||
* abundance of caution, disable our custom poisoning on mingw. */
|
||||
#ifdef __MINGW32__
|
||||
#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
|
||||
#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
|
||||
#endif
|
||||
#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
|
||||
#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
@@ -235,20 +311,13 @@ void __msan_poison(const volatile void *a, size_t size);
|
||||
/* Returns the offset of the first (at least partially) poisoned byte in the
|
||||
memory range, or -1 if the whole range is good. */
|
||||
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
|
||||
|
||||
/* Print shadow and origin for the memory range to stderr in a human-readable
|
||||
format. */
|
||||
void __msan_print_shadow(const volatile void *x, size_t size);
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
#ifndef ZSTD_ADDRESS_SANITIZER
|
||||
# if __has_feature(address_sanitizer)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# elif defined(__SANITIZE_ADDRESS__)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_ADDRESS_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ZSTD_ADDRESS_SANITIZER
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* ******************************************************************
|
||||
* debug
|
||||
* Part of FSE library
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* ******************************************************************
|
||||
* debug
|
||||
* Part of FSE library
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* ******************************************************************
|
||||
* Common functions of New Generation Entropy library
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
@@ -19,8 +19,8 @@
|
||||
#include "error_private.h" /* ERR_*, ERROR */
|
||||
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
|
||||
#include "fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
|
||||
#include "huf.h"
|
||||
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
|
||||
|
||||
|
||||
/*=== Version ===*/
|
||||
@@ -38,28 +38,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
|
||||
/*-**************************************************************
|
||||
* FSE NCount encoding-decoding
|
||||
****************************************************************/
|
||||
static U32 FSE_ctz(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanForward(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_ctz(val);
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
return __CTZ(val);
|
||||
# else /* Software version */
|
||||
U32 count = 0;
|
||||
while ((val & 1) == 0) {
|
||||
val >>= 1;
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
@@ -107,7 +85,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
|
||||
* repeat.
|
||||
* Avoid UB by setting the high bit to 1.
|
||||
*/
|
||||
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
|
||||
int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
|
||||
while (repeats >= 12) {
|
||||
charnum += 3 * 12;
|
||||
if (LIKELY(ip <= iend-7)) {
|
||||
@@ -118,7 +96,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
|
||||
ip = iend - 4;
|
||||
}
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
|
||||
repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
|
||||
}
|
||||
charnum += 3 * repeats;
|
||||
bitStream >>= 2 * repeats;
|
||||
@@ -183,7 +161,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
|
||||
* know that threshold > 1.
|
||||
*/
|
||||
if (remaining <= 1) break;
|
||||
nbBits = BIT_highbit32(remaining) + 1;
|
||||
nbBits = ZSTD_highbit32(remaining) + 1;
|
||||
threshold = 1 << (nbBits - 1);
|
||||
}
|
||||
if (charnum >= maxSV1) break;
|
||||
@@ -217,7 +195,7 @@ static size_t FSE_readNCount_body_default(
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
@@ -258,7 +236,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
|
||||
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
@@ -299,21 +277,21 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
weightTotal = 0;
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
rankStats[huffWeight[n]]++;
|
||||
weightTotal += (1 << huffWeight[n]) >> 1;
|
||||
} }
|
||||
if (weightTotal == 0) return ERROR(corruption_detected);
|
||||
|
||||
/* get last non-null symbol weight (implied, total must be 2^n) */
|
||||
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
|
||||
{ U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
|
||||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
*tableLogPtr = tableLog;
|
||||
/* determine last weight */
|
||||
{ U32 const total = 1 << tableLog;
|
||||
U32 const rest = total - weightTotal;
|
||||
U32 const verif = 1 << BIT_highbit32(rest);
|
||||
U32 const lastWeight = BIT_highbit32(rest) + 1;
|
||||
U32 const verif = 1 << ZSTD_highbit32(rest);
|
||||
U32 const lastWeight = ZSTD_highbit32(rest) + 1;
|
||||
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
|
||||
huffWeight[oSize] = (BYTE)lastWeight;
|
||||
rankStats[lastWeight]++;
|
||||
@@ -337,7 +315,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
@@ -350,13 +328,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
int bmi2)
|
||||
int flags)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
if (flags & HUF_flags_bmi2) {
|
||||
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
(void)flags;
|
||||
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -27,9 +27,11 @@ const char* ERR_getErrorString(ERR_enum code)
|
||||
case PREFIX(version_unsupported): return "Version not supported";
|
||||
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
|
||||
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
|
||||
case PREFIX(corruption_detected): return "Corrupted block detected";
|
||||
case PREFIX(corruption_detected): return "Data corruption detected";
|
||||
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
|
||||
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
|
||||
case PREFIX(parameter_unsupported): return "Unsupported parameter";
|
||||
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
|
||||
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
|
||||
case PREFIX(init_missing): return "Context should be init first";
|
||||
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
|
||||
@@ -38,17 +40,22 @@ const char* ERR_getErrorString(ERR_enum code)
|
||||
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
|
||||
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
|
||||
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
|
||||
case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
|
||||
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
|
||||
case PREFIX(dictionary_wrong): return "Dictionary mismatch";
|
||||
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
|
||||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
|
||||
case PREFIX(srcSize_wrong): return "Src size is incorrect";
|
||||
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
|
||||
case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
|
||||
case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
|
||||
/* following error codes are not stable and may be removed or changed in a future version */
|
||||
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
|
||||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
|
||||
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
|
||||
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
|
||||
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
|
||||
case PREFIX(externalSequences_invalid): return "External sequences are not valid";
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -22,6 +22,8 @@ extern "C" {
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include "../zstd_errors.h" /* enum list */
|
||||
#include "compiler.h"
|
||||
#include "debug.h"
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
@@ -73,6 +75,83 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
|
||||
return ERR_getErrorString(ERR_getErrorCode(code));
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
#define ERR_QUOTE(str) #str
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* ******************************************************************
|
||||
* FSE : Finite State Entropy codec
|
||||
* Public Prototypes declaration
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
@@ -53,34 +53,6 @@ extern "C" {
|
||||
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* FSE simple functions
|
||||
******************************************/
|
||||
/*! FSE_compress() :
|
||||
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
|
||||
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
|
||||
@return : size of compressed data (<= dstCapacity).
|
||||
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
|
||||
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
|
||||
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/*! FSE_decompress():
|
||||
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
|
||||
into already allocated destination buffer 'dst', of size 'dstCapacity'.
|
||||
@return : size of regenerated data (<= maxDstSize),
|
||||
or an error code, which can be tested using FSE_isError() .
|
||||
|
||||
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
|
||||
Why ? : making this distinction requires a header.
|
||||
Header management is intentionally delegated to the user layer, which can better manage special cases.
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
|
||||
const void* cSrc, size_t cSrcSize);
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
* Tool functions
|
||||
******************************************/
|
||||
@@ -91,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
|
||||
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
* FSE advanced functions
|
||||
******************************************/
|
||||
/*! FSE_compress2() :
|
||||
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
|
||||
Both parameters can be defined as '0' to mean : use default value
|
||||
@return : size of compressed data
|
||||
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
|
||||
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
|
||||
if FSE_isError(return), it's an error code.
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
* FSE detailed API
|
||||
******************************************/
|
||||
@@ -164,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
||||
/*! Constructor and Destructor of FSE_CTable.
|
||||
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
|
||||
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
|
||||
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
|
||||
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
|
||||
|
||||
/*! FSE_buildCTable():
|
||||
Builds `ct`, which must be already allocated, using FSE_createCTable().
|
||||
@@ -241,23 +197,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
|
||||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
||||
const void* rBuffer, size_t rBuffSize, int bmi2);
|
||||
|
||||
/*! Constructor and Destructor of FSE_DTable.
|
||||
Note that its size depends on 'tableLog' */
|
||||
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
|
||||
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
|
||||
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
|
||||
|
||||
/*! FSE_buildDTable():
|
||||
Builds 'dt', which must be already allocated, using FSE_createDTable().
|
||||
return : 0, or an errorCode, which can be tested using FSE_isError() */
|
||||
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
/*! FSE_decompress_usingDTable():
|
||||
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
|
||||
into `dst` which must be already allocated.
|
||||
@return : size of regenerated data (necessarily <= `dstCapacity`),
|
||||
or an errorCode, which can be tested using FSE_isError() */
|
||||
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
|
||||
|
||||
/*!
|
||||
Tutorial :
|
||||
@@ -320,24 +260,15 @@ If there is an error, the function will return an error code, which can be teste
|
||||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
|
||||
/**< same as FSE_optimalTableLog(), which used `minus==2` */
|
||||
|
||||
/* FSE_compress_wksp() :
|
||||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
|
||||
*/
|
||||
#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
|
||||
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
|
||||
/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
|
||||
|
||||
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
||||
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
|
||||
|
||||
/* FSE_buildCTable_wksp() :
|
||||
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
|
||||
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
|
||||
*/
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
|
||||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
@@ -346,19 +277,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
||||
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
|
||||
|
||||
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
|
||||
/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
|
||||
|
||||
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
|
||||
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
|
||||
|
||||
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
|
||||
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
|
||||
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
|
||||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
|
||||
|
||||
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
|
||||
/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
|
||||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
|
||||
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
|
||||
|
||||
typedef enum {
|
||||
FSE_repeat_none, /**< Cannot use the previous table */
|
||||
@@ -554,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
|
||||
|
||||
/* FSE_getMaxNbBits() :
|
||||
* Approximate maximum cost of a symbol, in bits.
|
||||
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
|
||||
* Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
|
||||
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
|
||||
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
|
||||
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* ******************************************************************
|
||||
* FSE : Finite State Entropy decoder
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "error_private.h"
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h"
|
||||
#include "bits.h" /* ZSTD_highbit32 */
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
@@ -55,19 +56,6 @@
|
||||
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
|
||||
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
|
||||
|
||||
|
||||
/* Function templates */
|
||||
FSE_DTable* FSE_createDTable (unsigned tableLog)
|
||||
{
|
||||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
||||
return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
|
||||
}
|
||||
|
||||
void FSE_freeDTable (FSE_DTable* dt)
|
||||
{
|
||||
ZSTD_free(dt);
|
||||
}
|
||||
|
||||
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
|
||||
@@ -127,10 +115,10 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
|
||||
}
|
||||
}
|
||||
/* Now we spread those positions across the table.
|
||||
* The benefit of doing it in two stages is that we avoid the the
|
||||
* The benefit of doing it in two stages is that we avoid the
|
||||
* variable size inner loop, which caused lots of branch misses.
|
||||
* Now we can run through all the positions without any branch misses.
|
||||
* We unroll the loop twice, since that is what emperically worked best.
|
||||
* We unroll the loop twice, since that is what empirically worked best.
|
||||
*/
|
||||
{
|
||||
size_t position = 0;
|
||||
@@ -166,7 +154,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
|
||||
for (u=0; u<tableSize; u++) {
|
||||
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
|
||||
U32 const nextState = symbolNext[symbol]++;
|
||||
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
|
||||
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
||||
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
||||
} }
|
||||
|
||||
@@ -184,49 +172,6 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi
|
||||
/*-*******************************************************
|
||||
* Decompression (Byte symbols)
|
||||
*********************************************************/
|
||||
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
|
||||
{
|
||||
void* ptr = dt;
|
||||
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
||||
void* dPtr = dt + 1;
|
||||
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
|
||||
|
||||
DTableH->tableLog = 0;
|
||||
DTableH->fastMode = 0;
|
||||
|
||||
cell->newState = 0;
|
||||
cell->symbol = symbolValue;
|
||||
cell->nbBits = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
|
||||
{
|
||||
void* ptr = dt;
|
||||
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
||||
void* dPtr = dt + 1;
|
||||
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
|
||||
const unsigned tableSize = 1 << nbBits;
|
||||
const unsigned tableMask = tableSize - 1;
|
||||
const unsigned maxSV1 = tableMask+1;
|
||||
unsigned s;
|
||||
|
||||
/* Sanity checks */
|
||||
if (nbBits < 1) return ERROR(GENERIC); /* min size */
|
||||
|
||||
/* Build Decoding Table */
|
||||
DTableH->tableLog = (U16)nbBits;
|
||||
DTableH->fastMode = 1;
|
||||
for (s=0; s<maxSV1; s++) {
|
||||
dinfo[s].newState = 0;
|
||||
dinfo[s].symbol = (BYTE)s;
|
||||
dinfo[s].nbBits = (BYTE)nbBits;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
||||
void* dst, size_t maxDstSize,
|
||||
@@ -290,26 +235,6 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const FSE_DTable* dt)
|
||||
{
|
||||
const void* ptr = dt;
|
||||
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
||||
const U32 fastMode = DTableH->fastMode;
|
||||
|
||||
/* select fast mode (static) */
|
||||
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
|
||||
return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
|
||||
FSE_DTable dtable[1]; /* Dynamically sized */
|
||||
@@ -342,7 +267,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
|
||||
}
|
||||
|
||||
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
|
||||
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
|
||||
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
||||
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
||||
|
||||
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
|
||||
@@ -365,7 +291,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
||||
}
|
||||
@@ -382,22 +308,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc,
|
||||
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
||||
}
|
||||
|
||||
|
||||
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
|
||||
U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
|
||||
return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
|
||||
}
|
||||
|
||||
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
/* Static analyzer seems unable to understand this table will be properly initialized later */
|
||||
U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
|
||||
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* FSE_COMMONDEFS_ONLY */
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* ******************************************************************
|
||||
* huff0 huffman codec,
|
||||
* part of Finite State Entropy library
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
@@ -21,106 +21,29 @@ extern "C" {
|
||||
|
||||
/* *** Dependencies *** */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
/* *** library symbols visibility *** */
|
||||
/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
|
||||
* HUF symbols remain "private" (internal symbols for library only).
|
||||
* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
|
||||
#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
|
||||
#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
|
||||
# define HUF_PUBLIC_API __declspec(dllexport)
|
||||
#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
|
||||
# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
|
||||
#else
|
||||
# define HUF_PUBLIC_API
|
||||
#endif
|
||||
|
||||
|
||||
/* ========================== */
|
||||
/* *** simple functions *** */
|
||||
/* ========================== */
|
||||
|
||||
/** HUF_compress() :
|
||||
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
|
||||
* 'dst' buffer must be already allocated.
|
||||
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
|
||||
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
|
||||
* @return : size of compressed data (<= `dstCapacity`).
|
||||
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
|
||||
* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
|
||||
*/
|
||||
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/** HUF_decompress() :
|
||||
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
|
||||
* into already allocated buffer 'dst', of minimum size 'dstSize'.
|
||||
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
|
||||
* Note : in contrast with FSE, HUF_decompress can regenerate
|
||||
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
|
||||
* because it knows size to regenerate (originalSize).
|
||||
* @return : size of regenerated data (== originalSize),
|
||||
* or an error code, which can be tested using HUF_isError()
|
||||
*/
|
||||
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
|
||||
const void* cSrc, size_t cSrcSize);
|
||||
|
||||
|
||||
/* *** Tool functions *** */
|
||||
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
|
||||
HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
|
||||
|
||||
/* Error Management */
|
||||
HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
|
||||
HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
|
||||
|
||||
|
||||
/* *** Advanced function *** */
|
||||
|
||||
/** HUF_compress2() :
|
||||
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
|
||||
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
|
||||
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
|
||||
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
/** HUF_compress4X_wksp() :
|
||||
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
|
||||
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
|
||||
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
|
||||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize);
|
||||
|
||||
#endif /* HUF_H_298734234 */
|
||||
|
||||
/* ******************************************************************
|
||||
* WARNING !!
|
||||
* The following section contains advanced and experimental definitions
|
||||
* which shall never be used in the context of a dynamic library,
|
||||
* because they are not guaranteed to remain stable in the future.
|
||||
* Only consider them in association with static linking.
|
||||
* *****************************************************************/
|
||||
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
|
||||
#define HUF_H_HUF_STATIC_LINKING_ONLY
|
||||
|
||||
/* *** Dependencies *** */
|
||||
#include "mem.h" /* U32 */
|
||||
#include "mem.h" /* U32 */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
|
||||
|
||||
/* *** Tool functions *** */
|
||||
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
|
||||
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
|
||||
|
||||
/* Error Management */
|
||||
unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
|
||||
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
|
||||
|
||||
|
||||
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
|
||||
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
|
||||
|
||||
/* *** Constants *** */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
|
||||
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
|
||||
#define HUF_SYMBOLVALUE_MAX 255
|
||||
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
|
||||
# error "HUF_TABLELOG_MAX is too large !"
|
||||
#endif
|
||||
@@ -136,15 +59,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
||||
/* static allocation of HUF's Compression Table */
|
||||
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
|
||||
struct HUF_CElt_s {
|
||||
U16 val;
|
||||
BYTE nbBits;
|
||||
}; /* typedef'd to HUF_CElt */
|
||||
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
|
||||
typedef size_t HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
|
||||
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
|
||||
|
||||
/* static allocation of HUF's DTable */
|
||||
typedef U32 HUF_DTable;
|
||||
@@ -158,25 +77,49 @@ typedef U32 HUF_DTable;
|
||||
/* ****************************************
|
||||
* Advanced decompression functions
|
||||
******************************************/
|
||||
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
|
||||
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
|
||||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
|
||||
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
/**
|
||||
* Huffman flags bitset.
|
||||
* For all flags, 0 is the default value.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
|
||||
* Otherwise: Ignored.
|
||||
*/
|
||||
HUF_flags_bmi2 = (1 << 0),
|
||||
/**
|
||||
* If set: Test possible table depths to find the one that produces the smallest header + encoded size.
|
||||
* If unset: Use heuristic to find the table depth.
|
||||
*/
|
||||
HUF_flags_optimalDepth = (1 << 1),
|
||||
/**
|
||||
* If set: If the previous table can encode the input, always reuse the previous table.
|
||||
* If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
|
||||
*/
|
||||
HUF_flags_preferRepeat = (1 << 2),
|
||||
/**
|
||||
* If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
|
||||
* If unset: Always histogram the entire input.
|
||||
*/
|
||||
HUF_flags_suspectUncompressible = (1 << 3),
|
||||
/**
|
||||
* If set: Don't use assembly implementations
|
||||
* If unset: Allow using assembly implementations
|
||||
*/
|
||||
HUF_flags_disableAsm = (1 << 4),
|
||||
/**
|
||||
* If set: Don't use the fast decoding loop, always use the fallback decoding loop.
|
||||
* If unset: Use the fast decoding loop when possible.
|
||||
*/
|
||||
HUF_flags_disableFast = (1 << 5)
|
||||
} HUF_flags_e;
|
||||
|
||||
|
||||
/* ****************************************
|
||||
* HUF detailed API
|
||||
* ****************************************/
|
||||
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
|
||||
|
||||
/*! HUF_compress() does the following:
|
||||
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
|
||||
@@ -189,11 +132,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
* For example, it's possible to compress several blocks using the same 'CTable',
|
||||
* or to save and regenerate 'CTable' using external methods.
|
||||
*/
|
||||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
|
||||
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
|
||||
unsigned HUF_minTableLog(unsigned symbolCardinality);
|
||||
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
|
||||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
|
||||
size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
|
||||
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
|
||||
@@ -202,22 +146,24 @@ typedef enum {
|
||||
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
|
||||
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
|
||||
} HUF_repeat;
|
||||
|
||||
/** HUF_compress4X_repeat() :
|
||||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
|
||||
|
||||
/** HUF_buildCTable_wksp() :
|
||||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
|
||||
*/
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree,
|
||||
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
|
||||
@@ -243,17 +189,16 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
|
||||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workspace, size_t wkspSize,
|
||||
int bmi2);
|
||||
int flags);
|
||||
|
||||
/** HUF_readCTable() :
|
||||
* Loading a CTable saved with HUF_writeCTable() */
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
|
||||
|
||||
/** HUF_getNbBits() :
|
||||
/** HUF_getNbBitsFromCTable() :
|
||||
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private
|
||||
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private */
|
||||
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
|
||||
|
||||
/*
|
||||
* HUF_decompress() does the following:
|
||||
@@ -282,80 +227,46 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
|
||||
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
|
||||
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
|
||||
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
|
||||
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
|
||||
|
||||
/* ====================== */
|
||||
/* single stream variants */
|
||||
/* ====================== */
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
|
||||
/** HUF_compress1X_repeat() :
|
||||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
|
||||
|
||||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
||||
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
/* BMI2 variants.
|
||||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
|
||||
*/
|
||||
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
|
||||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#endif
|
||||
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
|
||||
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
|
||||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#endif
|
||||
|
||||
#endif /* HUF_STATIC_LINKING_ONLY */
|
||||
#endif /* HUF_H_298734234 */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -51,6 +51,8 @@ extern "C" {
|
||||
# include <stdint.h> /* intptr_t */
|
||||
# endif
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint8_t U8;
|
||||
typedef int8_t S8;
|
||||
typedef uint16_t U16;
|
||||
typedef int16_t S16;
|
||||
typedef uint32_t U32;
|
||||
@@ -63,6 +65,8 @@ extern "C" {
|
||||
# error "this implementation requires char to be exactly 8-bit type"
|
||||
#endif
|
||||
typedef unsigned char BYTE;
|
||||
typedef unsigned char U8;
|
||||
typedef signed char S8;
|
||||
#if USHRT_MAX != 65535
|
||||
# error "this implementation requires short to be exactly 16-bit type"
|
||||
#endif
|
||||
@@ -129,21 +133,15 @@ MEM_STATIC size_t MEM_swapST(size_t in);
|
||||
/*-**************************************************************
|
||||
* Memory I/O Implementation
|
||||
*****************************************************************/
|
||||
/* MEM_FORCE_MEMORY_ACCESS :
|
||||
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
|
||||
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
|
||||
* The below switch allow to select different access method for improved performance.
|
||||
* Method 0 (default) : use `memcpy()`. Safe and portable.
|
||||
* Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
|
||||
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
|
||||
/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory:
|
||||
* Method 0 : always use `memcpy()`. Safe and portable.
|
||||
* Method 1 : Use compiler extension to set unaligned access.
|
||||
* Method 2 : direct access. This method is portable but violate C standard.
|
||||
* It can generate buggy code on targets depending on alignment.
|
||||
* In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
|
||||
* See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
|
||||
* Prefer these methods in priority order (0 > 1 > 2)
|
||||
* Default : method 1 if supported, else method 0
|
||||
*/
|
||||
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
||||
# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
|
||||
# ifdef __GNUC__
|
||||
# define MEM_FORCE_MEMORY_ACCESS 1
|
||||
# endif
|
||||
#endif
|
||||
@@ -153,8 +151,22 @@ MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
|
||||
|
||||
MEM_STATIC unsigned MEM_isLittleEndian(void)
|
||||
{
|
||||
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
return 1;
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
return 0;
|
||||
#elif defined(__clang__) && __LITTLE_ENDIAN__
|
||||
return 1;
|
||||
#elif defined(__clang__) && __BIG_ENDIAN__
|
||||
return 0;
|
||||
#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
|
||||
return 1;
|
||||
#elif defined(__DMC__) && defined(_M_IX86)
|
||||
return 1;
|
||||
#else
|
||||
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
|
||||
return one.c[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
|
||||
@@ -172,30 +184,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
|
||||
|
||||
#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
|
||||
|
||||
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
|
||||
/* currently only defined for gcc and icc */
|
||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||
__pragma( pack(push, 1) )
|
||||
typedef struct { U16 v; } unalign16;
|
||||
typedef struct { U32 v; } unalign32;
|
||||
typedef struct { U64 v; } unalign64;
|
||||
typedef struct { size_t v; } unalignArch;
|
||||
__pragma( pack(pop) )
|
||||
#else
|
||||
typedef struct { U16 v; } __attribute__((packed)) unalign16;
|
||||
typedef struct { U32 v; } __attribute__((packed)) unalign32;
|
||||
typedef struct { U64 v; } __attribute__((packed)) unalign64;
|
||||
typedef struct { size_t v; } __attribute__((packed)) unalignArch;
|
||||
#endif
|
||||
typedef __attribute__((aligned(1))) U16 unalign16;
|
||||
typedef __attribute__((aligned(1))) U32 unalign32;
|
||||
typedef __attribute__((aligned(1))) U64 unalign64;
|
||||
typedef __attribute__((aligned(1))) size_t unalignArch;
|
||||
|
||||
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
|
||||
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
|
||||
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
|
||||
MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
|
||||
MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; }
|
||||
MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; }
|
||||
MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; }
|
||||
MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; }
|
||||
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; }
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; }
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; }
|
||||
|
||||
#else
|
||||
|
||||
@@ -239,6 +240,14 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value)
|
||||
|
||||
#endif /* MEM_FORCE_MEMORY_ACCESS */
|
||||
|
||||
MEM_STATIC U32 MEM_swap32_fallback(U32 in)
|
||||
{
|
||||
return ((in << 24) & 0xff000000 ) |
|
||||
((in << 8) & 0x00ff0000 ) |
|
||||
((in >> 8) & 0x0000ff00 ) |
|
||||
((in >> 24) & 0x000000ff );
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_swap32(U32 in)
|
||||
{
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
@@ -247,13 +256,22 @@ MEM_STATIC U32 MEM_swap32(U32 in)
|
||||
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
|
||||
return __builtin_bswap32(in);
|
||||
#else
|
||||
return ((in << 24) & 0xff000000 ) |
|
||||
((in << 8) & 0x00ff0000 ) |
|
||||
((in >> 8) & 0x0000ff00 ) |
|
||||
((in >> 24) & 0x000000ff );
|
||||
return MEM_swap32_fallback(in);
|
||||
#endif
|
||||
}
|
||||
|
||||
MEM_STATIC U64 MEM_swap64_fallback(U64 in)
|
||||
{
|
||||
return ((in << 56) & 0xff00000000000000ULL) |
|
||||
((in << 40) & 0x00ff000000000000ULL) |
|
||||
((in << 24) & 0x0000ff0000000000ULL) |
|
||||
((in << 8) & 0x000000ff00000000ULL) |
|
||||
((in >> 8) & 0x00000000ff000000ULL) |
|
||||
((in >> 24) & 0x0000000000ff0000ULL) |
|
||||
((in >> 40) & 0x000000000000ff00ULL) |
|
||||
((in >> 56) & 0x00000000000000ffULL);
|
||||
}
|
||||
|
||||
MEM_STATIC U64 MEM_swap64(U64 in)
|
||||
{
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
@@ -262,14 +280,7 @@ MEM_STATIC U64 MEM_swap64(U64 in)
|
||||
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
|
||||
return __builtin_bswap64(in);
|
||||
#else
|
||||
return ((in << 56) & 0xff00000000000000ULL) |
|
||||
((in << 40) & 0x00ff000000000000ULL) |
|
||||
((in << 24) & 0x0000ff0000000000ULL) |
|
||||
((in << 8) & 0x000000ff00000000ULL) |
|
||||
((in >> 8) & 0x00000000ff000000ULL) |
|
||||
((in >> 24) & 0x0000000000ff0000ULL) |
|
||||
((in >> 40) & 0x000000000000ff00ULL) |
|
||||
((in >> 56) & 0x00000000000000ffULL);
|
||||
return MEM_swap64_fallback(in);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -10,9 +10,9 @@
|
||||
|
||||
|
||||
/* ====== Dependencies ======= */
|
||||
#include "../common/allocations.h" /* ZSTD_customCalloc, ZSTD_customFree */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "debug.h" /* assert */
|
||||
#include "zstd_internal.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
||||
#include "pool.h"
|
||||
|
||||
/* ====== Compiler specifics ====== */
|
||||
@@ -86,7 +86,7 @@ static void* POOL_thread(void* opaque) {
|
||||
{ POOL_job const job = ctx->queue[ctx->queueHead];
|
||||
ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
|
||||
ctx->numThreadsBusy++;
|
||||
ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
|
||||
ctx->queueEmpty = (ctx->queueHead == ctx->queueTail);
|
||||
/* Unlock the mutex, signal a pusher, and run the job */
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
@@ -96,15 +96,14 @@ static void* POOL_thread(void* opaque) {
|
||||
/* If the intended queue size was 0, signal after finishing job */
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
ctx->numThreadsBusy--;
|
||||
if (ctx->queueSize == 1) {
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
}
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
}
|
||||
} /* for (;;) */
|
||||
assert(0); /* Unreachable */
|
||||
}
|
||||
|
||||
/* ZSTD_createThreadPool() : public access point */
|
||||
POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
|
||||
return POOL_create (numThreads, 0);
|
||||
}
|
||||
@@ -114,7 +113,8 @@ POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
|
||||
}
|
||||
|
||||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
||||
ZSTD_customMem customMem) {
|
||||
ZSTD_customMem customMem)
|
||||
{
|
||||
POOL_ctx* ctx;
|
||||
/* Check parameters */
|
||||
if (!numThreads) { return NULL; }
|
||||
@@ -126,7 +126,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
||||
* empty and full queues.
|
||||
*/
|
||||
ctx->queueSize = queueSize + 1;
|
||||
ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
|
||||
ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem);
|
||||
ctx->queueHead = 0;
|
||||
ctx->queueTail = 0;
|
||||
ctx->numThreadsBusy = 0;
|
||||
@@ -140,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
||||
}
|
||||
ctx->shutdown = 0;
|
||||
/* Allocate space for the thread handles */
|
||||
ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
|
||||
ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
|
||||
ctx->threadCapacity = 0;
|
||||
ctx->customMem = customMem;
|
||||
/* Check for errors */
|
||||
@@ -173,7 +173,7 @@ static void POOL_join(POOL_ctx* ctx) {
|
||||
/* Join all of the threads */
|
||||
{ size_t i;
|
||||
for (i = 0; i < ctx->threadCapacity; ++i) {
|
||||
ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
|
||||
ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */
|
||||
} }
|
||||
}
|
||||
|
||||
@@ -188,11 +188,22 @@ void POOL_free(POOL_ctx *ctx) {
|
||||
ZSTD_customFree(ctx, ctx->customMem);
|
||||
}
|
||||
|
||||
/*! POOL_joinJobs() :
|
||||
* Waits for all queued jobs to finish executing.
|
||||
*/
|
||||
void POOL_joinJobs(POOL_ctx* ctx) {
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
while(!ctx->queueEmpty || ctx->numThreadsBusy > 0) {
|
||||
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
}
|
||||
|
||||
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
|
||||
POOL_free (pool);
|
||||
}
|
||||
|
||||
size_t POOL_sizeof(POOL_ctx *ctx) {
|
||||
size_t POOL_sizeof(const POOL_ctx* ctx) {
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
return sizeof(*ctx)
|
||||
+ ctx->queueSize * sizeof(POOL_job)
|
||||
@@ -209,7 +220,7 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
|
||||
return 0;
|
||||
}
|
||||
/* numThreads > threadCapacity */
|
||||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
|
||||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
|
||||
if (!threadPool) return 1;
|
||||
/* replace existing thread pool */
|
||||
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
|
||||
@@ -257,9 +268,12 @@ static int isQueueFull(POOL_ctx const* ctx) {
|
||||
}
|
||||
|
||||
|
||||
static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
|
||||
static void
|
||||
POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
|
||||
{
|
||||
POOL_job const job = {function, opaque};
|
||||
POOL_job job;
|
||||
job.function = function;
|
||||
job.opaque = opaque;
|
||||
assert(ctx != NULL);
|
||||
if (ctx->shutdown) return;
|
||||
|
||||
@@ -313,7 +327,9 @@ POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
|
||||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
|
||||
POOL_ctx*
|
||||
POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem)
|
||||
{
|
||||
(void)numThreads;
|
||||
(void)queueSize;
|
||||
(void)customMem;
|
||||
@@ -325,6 +341,11 @@ void POOL_free(POOL_ctx* ctx) {
|
||||
(void)ctx;
|
||||
}
|
||||
|
||||
void POOL_joinJobs(POOL_ctx* ctx){
|
||||
assert(!ctx || ctx == &g_poolCtx);
|
||||
(void)ctx;
|
||||
}
|
||||
|
||||
int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
|
||||
(void)ctx; (void)numThreads;
|
||||
return 0;
|
||||
@@ -341,7 +362,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t POOL_sizeof(POOL_ctx* ctx) {
|
||||
size_t POOL_sizeof(const POOL_ctx* ctx) {
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
assert(ctx == &g_poolCtx);
|
||||
return sizeof(*ctx);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -38,6 +38,12 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
||||
*/
|
||||
void POOL_free(POOL_ctx* ctx);
|
||||
|
||||
|
||||
/*! POOL_joinJobs() :
|
||||
* Waits for all queued jobs to finish executing.
|
||||
*/
|
||||
void POOL_joinJobs(POOL_ctx* ctx);
|
||||
|
||||
/*! POOL_resize() :
|
||||
* Expands or shrinks pool's number of threads.
|
||||
* This is more efficient than releasing + creating a new context,
|
||||
@@ -53,7 +59,7 @@ int POOL_resize(POOL_ctx* ctx, size_t numThreads);
|
||||
* @return threadpool memory usage
|
||||
* note : compatible with NULL (returns 0 in this case)
|
||||
*/
|
||||
size_t POOL_sizeof(POOL_ctx* ctx);
|
||||
size_t POOL_sizeof(const POOL_ctx* ctx);
|
||||
|
||||
/*! POOL_function :
|
||||
* The function type that can be added to a thread pool.
|
||||
@@ -70,7 +76,7 @@ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
|
||||
|
||||
|
||||
/*! POOL_tryAdd() :
|
||||
* Add the job `function(opaque)` to thread pool _if_ a worker is available.
|
||||
* Add the job `function(opaque)` to thread pool _if_ a queue slot is available.
|
||||
* Returns immediately even if not (does not block).
|
||||
* @return : 1 if successful, 0 if not.
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_PORTABILITY_MACROS_H
|
||||
#define ZSTD_PORTABILITY_MACROS_H
|
||||
|
||||
/**
|
||||
* This header file contains macro definitions to support portability.
|
||||
* This header is shared between C and ASM code, so it MUST only
|
||||
* contain macro definitions. It MUST not contain any C code.
|
||||
*
|
||||
* This header ONLY defines macros to detect platforms/feature support.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
#ifndef ZSTD_MEMORY_SANITIZER
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# define ZSTD_MEMORY_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_MEMORY_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
#ifndef ZSTD_ADDRESS_SANITIZER
|
||||
# if __has_feature(address_sanitizer)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# elif defined(__SANITIZE_ADDRESS__)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_ADDRESS_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under dfsan */
|
||||
#ifndef ZSTD_DATAFLOW_SANITIZER
|
||||
# if __has_feature(dataflow_sanitizer)
|
||||
# define ZSTD_DATAFLOW_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_DATAFLOW_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Mark the internal assembly functions as hidden */
|
||||
#ifdef __ELF__
|
||||
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
|
||||
#else
|
||||
# define ZSTD_HIDE_ASM_FUNCTION(func)
|
||||
#endif
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
*/
|
||||
#ifndef DYNAMIC_BMI2
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X64)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
# define DYNAMIC_BMI2 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Only enable assembly for GNUC compatible compilers,
|
||||
* because other platforms may not support GAS assembly syntax.
|
||||
*
|
||||
* Only enable assembly for Linux / MacOS, other platforms may
|
||||
* work, but they haven't been tested. This could likely be
|
||||
* extended to BSD systems.
|
||||
*
|
||||
* Disable assembly when MSAN is enabled, because MSAN requires
|
||||
* 100% of code to be instrumented to work.
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
# if defined(__linux__) || defined(__linux) || defined(__APPLE__)
|
||||
# if ZSTD_MEMORY_SANITIZER
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
# elif ZSTD_DATAFLOW_SANITIZER
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
# else
|
||||
# define ZSTD_ASM_SUPPORTED 1
|
||||
# endif
|
||||
# else
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
# endif
|
||||
#else
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Determines whether we should enable assembly for x86-64
|
||||
* with BMI2.
|
||||
*
|
||||
* Enable if all of the following conditions hold:
|
||||
* - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
|
||||
* - Assembly is supported
|
||||
* - We are compiling for x86-64 and either:
|
||||
* - DYNAMIC_BMI2 is enabled
|
||||
* - BMI2 is supported at compile time
|
||||
*/
|
||||
#if !defined(ZSTD_DISABLE_ASM) && \
|
||||
ZSTD_ASM_SUPPORTED && \
|
||||
defined(__x86_64__) && \
|
||||
(DYNAMIC_BMI2 || defined(__BMI2__))
|
||||
# define ZSTD_ENABLE_ASM_X86_64_BMI2 1
|
||||
#else
|
||||
# define ZSTD_ENABLE_ASM_X86_64_BMI2 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For x86 ELF targets, add .note.gnu.property section for Intel CET in
|
||||
* assembly sources when CET is enabled.
|
||||
*
|
||||
* Additionally, any function that may be called indirectly must begin
|
||||
* with ZSTD_CET_ENDBRANCH.
|
||||
*/
|
||||
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
|
||||
&& defined(__has_include)
|
||||
# if __has_include(<cet.h>)
|
||||
# include <cet.h>
|
||||
# define ZSTD_CET_ENDBRANCH _CET_ENDBR
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_CET_ENDBRANCH
|
||||
# define ZSTD_CET_ENDBRANCH
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_PORTABILITY_MACROS_H */
|
||||
@@ -23,8 +23,7 @@ int g_ZSTD_threading_useless_symbol;
|
||||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
|
||||
|
||||
/**
|
||||
* Windows minimalist Pthread Wrapper, based on :
|
||||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
|
||||
* Windows minimalist Pthread Wrapper
|
||||
*/
|
||||
|
||||
|
||||
@@ -35,37 +34,92 @@ int g_ZSTD_threading_useless_symbol;
|
||||
|
||||
/* === Implementation === */
|
||||
|
||||
typedef struct {
|
||||
void* (*start_routine)(void*);
|
||||
void* arg;
|
||||
int initialized;
|
||||
ZSTD_pthread_cond_t initialized_cond;
|
||||
ZSTD_pthread_mutex_t initialized_mutex;
|
||||
} ZSTD_thread_params_t;
|
||||
|
||||
static unsigned __stdcall worker(void *arg)
|
||||
{
|
||||
ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
|
||||
thread->arg = thread->start_routine(thread->arg);
|
||||
void* (*start_routine)(void*);
|
||||
void* thread_arg;
|
||||
|
||||
/* Initialized thread_arg and start_routine and signal main thread that we don't need it
|
||||
* to wait any longer.
|
||||
*/
|
||||
{
|
||||
ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)arg;
|
||||
thread_arg = thread_param->arg;
|
||||
start_routine = thread_param->start_routine;
|
||||
|
||||
/* Signal main thread that we are running and do not depend on its memory anymore */
|
||||
ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex);
|
||||
thread_param->initialized = 1;
|
||||
ZSTD_pthread_cond_signal(&thread_param->initialized_cond);
|
||||
ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex);
|
||||
}
|
||||
|
||||
start_routine(thread_arg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
|
||||
void* (*start_routine) (void*), void* arg)
|
||||
{
|
||||
ZSTD_thread_params_t thread_param;
|
||||
(void)unused;
|
||||
thread->arg = arg;
|
||||
thread->start_routine = start_routine;
|
||||
thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
|
||||
|
||||
if (!thread->handle)
|
||||
thread_param.start_routine = start_routine;
|
||||
thread_param.arg = arg;
|
||||
thread_param.initialized = 0;
|
||||
*thread = NULL;
|
||||
|
||||
/* Setup thread initialization synchronization */
|
||||
if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) {
|
||||
/* Should never happen on Windows */
|
||||
return -1;
|
||||
}
|
||||
if(ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL)) {
|
||||
/* Should never happen on Windows */
|
||||
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Spawn thread */
|
||||
*thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
|
||||
if (!thread) {
|
||||
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
|
||||
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
|
||||
return errno;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Wait for thread to be initialized */
|
||||
ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex);
|
||||
while(!thread_param.initialized) {
|
||||
ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex);
|
||||
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
|
||||
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread)
|
||||
{
|
||||
DWORD result;
|
||||
|
||||
if (!thread.handle) return 0;
|
||||
if (!thread) return 0;
|
||||
|
||||
result = WaitForSingleObject(thread, INFINITE);
|
||||
CloseHandle(thread);
|
||||
|
||||
result = WaitForSingleObject(thread.handle, INFINITE);
|
||||
switch (result) {
|
||||
case WAIT_OBJECT_0:
|
||||
if (value_ptr) *value_ptr = thread.arg;
|
||||
return 0;
|
||||
case WAIT_ABANDONED:
|
||||
return EINVAL;
|
||||
|
||||
@@ -23,8 +23,7 @@ extern "C" {
|
||||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
|
||||
|
||||
/**
|
||||
* Windows minimalist Pthread Wrapper, based on :
|
||||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
|
||||
* Windows minimalist Pthread Wrapper
|
||||
*/
|
||||
#ifdef WINVER
|
||||
# undef WINVER
|
||||
@@ -62,16 +61,12 @@ extern "C" {
|
||||
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
|
||||
|
||||
/* ZSTD_pthread_create() and ZSTD_pthread_join() */
|
||||
typedef struct {
|
||||
HANDLE handle;
|
||||
void* (*start_routine)(void*);
|
||||
void* arg;
|
||||
} ZSTD_pthread_t;
|
||||
typedef HANDLE ZSTD_pthread_t;
|
||||
|
||||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
|
||||
void* (*start_routine) (void*), void* arg);
|
||||
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread);
|
||||
|
||||
/**
|
||||
* add here more wrappers as required
|
||||
@@ -99,7 +94,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
||||
|
||||
#define ZSTD_pthread_t pthread_t
|
||||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
||||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
|
||||
#define ZSTD_pthread_join(a) pthread_join((a),NULL)
|
||||
|
||||
#else /* DEBUGLEVEL >= 1 */
|
||||
|
||||
@@ -124,7 +119,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
|
||||
|
||||
#define ZSTD_pthread_t pthread_t
|
||||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
||||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
|
||||
#define ZSTD_pthread_join(a) pthread_join((a),NULL)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* xxHash - Fast Hash algorithm
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - xxHash homepage: http://www.xxhash.com
|
||||
* - xxHash homepage: https://cyan4973.github.io/xxHash/
|
||||
* - xxHash source repository : https://github.com/Cyan4973/xxHash
|
||||
*
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
@@ -13,812 +13,12 @@
|
||||
*/
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Tuning parameters
|
||||
***************************************/
|
||||
/*!XXH_FORCE_MEMORY_ACCESS :
|
||||
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
|
||||
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
|
||||
* The below switch allow to select different access method for improved performance.
|
||||
* Method 0 (default) : use `memcpy()`. Safe and portable.
|
||||
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
|
||||
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
|
||||
* Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
|
||||
* It can generate buggy code on targets which do not support unaligned memory accesses.
|
||||
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
|
||||
* See http://stackoverflow.com/a/32095106/646947 for details.
|
||||
* Prefer these methods in priority order (0 > 1 > 2)
|
||||
|
||||
/*
|
||||
* xxhash.c instantiates functions defined in xxhash.h
|
||||
*/
|
||||
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
||||
# if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
|
||||
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
|
||||
defined(__ICCARM__)
|
||||
# define XXH_FORCE_MEMORY_ACCESS 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*!XXH_ACCEPT_NULL_INPUT_POINTER :
|
||||
* If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
|
||||
* When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
|
||||
* By default, this option is disabled. To enable it, uncomment below define :
|
||||
*/
|
||||
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
|
||||
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
|
||||
#define XXH_IMPLEMENTATION /* access definitions */
|
||||
|
||||
/*!XXH_FORCE_NATIVE_FORMAT :
|
||||
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
|
||||
* Results are therefore identical for little-endian and big-endian CPU.
|
||||
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
||||
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
|
||||
* to improve speed for Big-endian CPU.
|
||||
* This option has no impact on Little_Endian CPU.
|
||||
*/
|
||||
#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
|
||||
# define XXH_FORCE_NATIVE_FORMAT 0
|
||||
#endif
|
||||
|
||||
/*!XXH_FORCE_ALIGN_CHECK :
|
||||
* This is a minor performance trick, only useful with lots of very small keys.
|
||||
* It means : check for aligned/unaligned input.
|
||||
* The check costs one initial branch per hash; set to 0 when the input data
|
||||
* is guaranteed to be aligned.
|
||||
*/
|
||||
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
|
||||
# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
|
||||
# define XXH_FORCE_ALIGN_CHECK 0
|
||||
# else
|
||||
# define XXH_FORCE_ALIGN_CHECK 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Includes & Memory related functions
|
||||
***************************************/
|
||||
/* Modify the local functions below should you wish to use some other memory routines */
|
||||
/* for ZSTD_malloc(), ZSTD_free() */
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h" /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
|
||||
static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
|
||||
static void XXH_free (void* p) { ZSTD_free(p); }
|
||||
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
|
||||
|
||||
#ifndef XXH_STATIC_LINKING_ONLY
|
||||
# define XXH_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "xxhash.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Compiler Specific Options
|
||||
***************************************/
|
||||
#include "compiler.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Basic Types
|
||||
***************************************/
|
||||
#include "mem.h" /* BYTE, U32, U64, size_t */
|
||||
|
||||
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
|
||||
|
||||
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
|
||||
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
|
||||
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
|
||||
|
||||
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
||||
|
||||
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
|
||||
/* currently only defined for gcc and icc */
|
||||
typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
|
||||
|
||||
static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
|
||||
static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
|
||||
|
||||
#else
|
||||
|
||||
/* portable and safe solution. Generally efficient.
|
||||
* see : http://stackoverflow.com/a/32095106/646947
|
||||
*/
|
||||
|
||||
static U32 XXH_read32(const void* memPtr)
|
||||
{
|
||||
U32 val;
|
||||
ZSTD_memcpy(&val, memPtr, sizeof(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static U64 XXH_read64(const void* memPtr)
|
||||
{
|
||||
U64 val;
|
||||
ZSTD_memcpy(&val, memPtr, sizeof(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
|
||||
|
||||
|
||||
/* ****************************************
|
||||
* Compiler-specific Functions and Macros
|
||||
******************************************/
|
||||
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
|
||||
/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
|
||||
#if defined(_MSC_VER)
|
||||
# define XXH_rotl32(x,r) _rotl(x,r)
|
||||
# define XXH_rotl64(x,r) _rotl64(x,r)
|
||||
#else
|
||||
#if defined(__ICCARM__)
|
||||
# include <intrinsics.h>
|
||||
# define XXH_rotl32(x,r) __ROR(x,(32 - r))
|
||||
#else
|
||||
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
#endif
|
||||
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
# define XXH_swap32 _byteswap_ulong
|
||||
# define XXH_swap64 _byteswap_uint64
|
||||
#elif GCC_VERSION >= 403
|
||||
# define XXH_swap32 __builtin_bswap32
|
||||
# define XXH_swap64 __builtin_bswap64
|
||||
#else
|
||||
static U32 XXH_swap32 (U32 x)
|
||||
{
|
||||
return ((x << 24) & 0xff000000 ) |
|
||||
((x << 8) & 0x00ff0000 ) |
|
||||
((x >> 8) & 0x0000ff00 ) |
|
||||
((x >> 24) & 0x000000ff );
|
||||
}
|
||||
static U64 XXH_swap64 (U64 x)
|
||||
{
|
||||
return ((x << 56) & 0xff00000000000000ULL) |
|
||||
((x << 40) & 0x00ff000000000000ULL) |
|
||||
((x << 24) & 0x0000ff0000000000ULL) |
|
||||
((x << 8) & 0x000000ff00000000ULL) |
|
||||
((x >> 8) & 0x00000000ff000000ULL) |
|
||||
((x >> 24) & 0x0000000000ff0000ULL) |
|
||||
((x >> 40) & 0x000000000000ff00ULL) |
|
||||
((x >> 56) & 0x00000000000000ffULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Architecture Macros
|
||||
***************************************/
|
||||
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
|
||||
|
||||
/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
|
||||
#ifndef XXH_CPU_LITTLE_ENDIAN
|
||||
static const int g_one = 1;
|
||||
# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
|
||||
#endif
|
||||
|
||||
|
||||
/* ***************************
|
||||
* Memory reads
|
||||
*****************************/
|
||||
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
if (align==XXH_unaligned)
|
||||
return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
|
||||
else
|
||||
return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
|
||||
{
|
||||
return XXH_readLE32_align(ptr, endian, XXH_unaligned);
|
||||
}
|
||||
|
||||
static U32 XXH_readBE32(const void* ptr)
|
||||
{
|
||||
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
if (align==XXH_unaligned)
|
||||
return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
|
||||
else
|
||||
return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
|
||||
{
|
||||
return XXH_readLE64_align(ptr, endian, XXH_unaligned);
|
||||
}
|
||||
|
||||
static U64 XXH_readBE64(const void* ptr)
|
||||
{
|
||||
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
|
||||
}
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Macros
|
||||
***************************************/
|
||||
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const U32 PRIME32_1 = 2654435761U;
|
||||
static const U32 PRIME32_2 = 2246822519U;
|
||||
static const U32 PRIME32_3 = 3266489917U;
|
||||
static const U32 PRIME32_4 = 668265263U;
|
||||
static const U32 PRIME32_5 = 374761393U;
|
||||
|
||||
static const U64 PRIME64_1 = 11400714785074694791ULL;
|
||||
static const U64 PRIME64_2 = 14029467366897019727ULL;
|
||||
static const U64 PRIME64_3 = 1609587929392839161ULL;
|
||||
static const U64 PRIME64_4 = 9650029242287828579ULL;
|
||||
static const U64 PRIME64_5 = 2870177450012600261ULL;
|
||||
|
||||
XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
|
||||
|
||||
|
||||
/* **************************
|
||||
* Utils
|
||||
****************************/
|
||||
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
|
||||
{
|
||||
ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
|
||||
{
|
||||
ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
|
||||
}
|
||||
|
||||
|
||||
/* ***************************
|
||||
* Simple Hash Functions
|
||||
*****************************/
|
||||
|
||||
static U32 XXH32_round(U32 seed, U32 input)
|
||||
{
|
||||
seed += input * PRIME32_2;
|
||||
seed = XXH_rotl32(seed, 13);
|
||||
seed *= PRIME32_1;
|
||||
return seed;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* bEnd = p + len;
|
||||
U32 h32;
|
||||
#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (p==NULL) {
|
||||
len=0;
|
||||
bEnd=p=(const BYTE*)(size_t)16;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (len>=16) {
|
||||
const BYTE* const limit = bEnd - 16;
|
||||
U32 v1 = seed + PRIME32_1 + PRIME32_2;
|
||||
U32 v2 = seed + PRIME32_2;
|
||||
U32 v3 = seed + 0;
|
||||
U32 v4 = seed - PRIME32_1;
|
||||
|
||||
do {
|
||||
v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
|
||||
v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
|
||||
v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
|
||||
v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
|
||||
} while (p<=limit);
|
||||
|
||||
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
|
||||
} else {
|
||||
h32 = seed + PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += (U32) len;
|
||||
|
||||
while (p+4<=bEnd) {
|
||||
h32 += XXH_get32bits(p) * PRIME32_3;
|
||||
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h32 += (*p) * PRIME32_5;
|
||||
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
|
||||
p++;
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15;
|
||||
h32 *= PRIME32_2;
|
||||
h32 ^= h32 >> 13;
|
||||
h32 *= PRIME32_3;
|
||||
h32 ^= h32 >> 16;
|
||||
|
||||
return h32;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
|
||||
{
|
||||
#if 0
|
||||
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
||||
XXH32_CREATESTATE_STATIC(state);
|
||||
XXH32_reset(state, seed);
|
||||
XXH32_update(state, input, len);
|
||||
return XXH32_digest(state);
|
||||
#else
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if (XXH_FORCE_ALIGN_CHECK) {
|
||||
if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
|
||||
else
|
||||
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
|
||||
} }
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
|
||||
else
|
||||
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static U64 XXH64_round(U64 acc, U64 input)
|
||||
{
|
||||
acc += input * PRIME64_2;
|
||||
acc = XXH_rotl64(acc, 31);
|
||||
acc *= PRIME64_1;
|
||||
return acc;
|
||||
}
|
||||
|
||||
static U64 XXH64_mergeRound(U64 acc, U64 val)
|
||||
{
|
||||
val = XXH64_round(0, val);
|
||||
acc ^= val;
|
||||
acc = acc * PRIME64_1 + PRIME64_4;
|
||||
return acc;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
U64 h64;
|
||||
#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (p==NULL) {
|
||||
len=0;
|
||||
bEnd=p=(const BYTE*)(size_t)32;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (len>=32) {
|
||||
const BYTE* const limit = bEnd - 32;
|
||||
U64 v1 = seed + PRIME64_1 + PRIME64_2;
|
||||
U64 v2 = seed + PRIME64_2;
|
||||
U64 v3 = seed + 0;
|
||||
U64 v4 = seed - PRIME64_1;
|
||||
|
||||
do {
|
||||
v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
|
||||
v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
|
||||
v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
|
||||
v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
|
||||
} while (p<=limit);
|
||||
|
||||
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
||||
h64 = XXH64_mergeRound(h64, v1);
|
||||
h64 = XXH64_mergeRound(h64, v2);
|
||||
h64 = XXH64_mergeRound(h64, v3);
|
||||
h64 = XXH64_mergeRound(h64, v4);
|
||||
|
||||
} else {
|
||||
h64 = seed + PRIME64_5;
|
||||
}
|
||||
|
||||
h64 += (U64) len;
|
||||
|
||||
while (p+8<=bEnd) {
|
||||
U64 const k1 = XXH64_round(0, XXH_get64bits(p));
|
||||
h64 ^= k1;
|
||||
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
|
||||
p+=8;
|
||||
}
|
||||
|
||||
if (p+4<=bEnd) {
|
||||
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
|
||||
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h64 ^= (*p) * PRIME64_5;
|
||||
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h64 ^= h64 >> 33;
|
||||
h64 *= PRIME64_2;
|
||||
h64 ^= h64 >> 29;
|
||||
h64 *= PRIME64_3;
|
||||
h64 ^= h64 >> 32;
|
||||
|
||||
return h64;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
|
||||
{
|
||||
#if 0
|
||||
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
||||
XXH64_CREATESTATE_STATIC(state);
|
||||
XXH64_reset(state, seed);
|
||||
XXH64_update(state, input, len);
|
||||
return XXH64_digest(state);
|
||||
#else
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if (XXH_FORCE_ALIGN_CHECK) {
|
||||
if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
|
||||
else
|
||||
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
|
||||
} }
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
|
||||
else
|
||||
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* **************************************************
|
||||
* Advanced Hash Functions
|
||||
****************************************************/
|
||||
|
||||
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
|
||||
{
|
||||
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
|
||||
}
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
|
||||
{
|
||||
XXH_free(statePtr);
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
|
||||
{
|
||||
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
|
||||
}
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
||||
{
|
||||
XXH_free(statePtr);
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
|
||||
/*** Hash feed ***/
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
|
||||
{
|
||||
XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
|
||||
ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
|
||||
state.v1 = seed + PRIME32_1 + PRIME32_2;
|
||||
state.v2 = seed + PRIME32_2;
|
||||
state.v3 = seed + 0;
|
||||
state.v4 = seed - PRIME32_1;
|
||||
ZSTD_memcpy(statePtr, &state, sizeof(state));
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
|
||||
{
|
||||
XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
|
||||
ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
|
||||
state.v1 = seed + PRIME64_1 + PRIME64_2;
|
||||
state.v2 = seed + PRIME64_2;
|
||||
state.v3 = seed + 0;
|
||||
state.v4 = seed - PRIME64_1;
|
||||
ZSTD_memcpy(statePtr, &state, sizeof(state));
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (input==NULL) return XXH_ERROR;
|
||||
#endif
|
||||
|
||||
state->total_len_32 += (unsigned)len;
|
||||
state->large_len |= (len>=16) | (state->total_len_32>=16);
|
||||
|
||||
if (state->memsize + len < 16) { /* fill in tmp buffer */
|
||||
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
|
||||
state->memsize += (unsigned)len;
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
if (state->memsize) { /* some data left from previous update */
|
||||
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
|
||||
{ const U32* p32 = state->mem32;
|
||||
state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
|
||||
state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
|
||||
state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
|
||||
state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
|
||||
}
|
||||
p += 16-state->memsize;
|
||||
state->memsize = 0;
|
||||
}
|
||||
|
||||
if (p <= bEnd-16) {
|
||||
const BYTE* const limit = bEnd - 16;
|
||||
U32 v1 = state->v1;
|
||||
U32 v2 = state->v2;
|
||||
U32 v3 = state->v3;
|
||||
U32 v4 = state->v4;
|
||||
|
||||
do {
|
||||
v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
|
||||
v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
|
||||
v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
|
||||
v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
|
||||
} while (p<=limit);
|
||||
|
||||
state->v1 = v1;
|
||||
state->v2 = v2;
|
||||
state->v3 = v3;
|
||||
state->v4 = v4;
|
||||
}
|
||||
|
||||
if (p < bEnd) {
|
||||
XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
|
||||
state->memsize = (unsigned)(bEnd-p);
|
||||
}
|
||||
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
|
||||
else
|
||||
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
|
||||
{
|
||||
const BYTE * p = (const BYTE*)state->mem32;
|
||||
const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
|
||||
U32 h32;
|
||||
|
||||
if (state->large_len) {
|
||||
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
|
||||
} else {
|
||||
h32 = state->v3 /* == seed */ + PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += state->total_len_32;
|
||||
|
||||
while (p+4<=bEnd) {
|
||||
h32 += XXH_readLE32(p, endian) * PRIME32_3;
|
||||
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h32 += (*p) * PRIME32_5;
|
||||
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15;
|
||||
h32 *= PRIME32_2;
|
||||
h32 ^= h32 >> 13;
|
||||
h32 *= PRIME32_3;
|
||||
h32 ^= h32 >> 16;
|
||||
|
||||
return h32;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_digest_endian(state_in, XXH_littleEndian);
|
||||
else
|
||||
return XXH32_digest_endian(state_in, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* **** XXH64 **** */
|
||||
|
||||
FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (input==NULL) return XXH_ERROR;
|
||||
#endif
|
||||
|
||||
state->total_len += len;
|
||||
|
||||
if (state->memsize + len < 32) { /* fill in tmp buffer */
|
||||
if (input != NULL) {
|
||||
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
|
||||
}
|
||||
state->memsize += (U32)len;
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
if (state->memsize) { /* tmp buffer is full */
|
||||
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
|
||||
state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
|
||||
state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
|
||||
state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
|
||||
state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
|
||||
p += 32-state->memsize;
|
||||
state->memsize = 0;
|
||||
}
|
||||
|
||||
if (p+32 <= bEnd) {
|
||||
const BYTE* const limit = bEnd - 32;
|
||||
U64 v1 = state->v1;
|
||||
U64 v2 = state->v2;
|
||||
U64 v3 = state->v3;
|
||||
U64 v4 = state->v4;
|
||||
|
||||
do {
|
||||
v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
|
||||
v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
|
||||
v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
|
||||
v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
|
||||
} while (p<=limit);
|
||||
|
||||
state->v1 = v1;
|
||||
state->v2 = v2;
|
||||
state->v3 = v3;
|
||||
state->v4 = v4;
|
||||
}
|
||||
|
||||
if (p < bEnd) {
|
||||
XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
|
||||
state->memsize = (unsigned)(bEnd-p);
|
||||
}
|
||||
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
|
||||
else
|
||||
return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
|
||||
{
|
||||
const BYTE * p = (const BYTE*)state->mem64;
|
||||
const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
|
||||
U64 h64;
|
||||
|
||||
if (state->total_len >= 32) {
|
||||
U64 const v1 = state->v1;
|
||||
U64 const v2 = state->v2;
|
||||
U64 const v3 = state->v3;
|
||||
U64 const v4 = state->v4;
|
||||
|
||||
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
||||
h64 = XXH64_mergeRound(h64, v1);
|
||||
h64 = XXH64_mergeRound(h64, v2);
|
||||
h64 = XXH64_mergeRound(h64, v3);
|
||||
h64 = XXH64_mergeRound(h64, v4);
|
||||
} else {
|
||||
h64 = state->v3 + PRIME64_5;
|
||||
}
|
||||
|
||||
h64 += (U64) state->total_len;
|
||||
|
||||
while (p+8<=bEnd) {
|
||||
U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
|
||||
h64 ^= k1;
|
||||
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
|
||||
p+=8;
|
||||
}
|
||||
|
||||
if (p+4<=bEnd) {
|
||||
h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
|
||||
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h64 ^= (*p) * PRIME64_5;
|
||||
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h64 ^= h64 >> 33;
|
||||
h64 *= PRIME64_2;
|
||||
h64 ^= h64 >> 29;
|
||||
h64 *= PRIME64_3;
|
||||
h64 ^= h64 >> 32;
|
||||
|
||||
return h64;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_digest_endian(state_in, XXH_littleEndian);
|
||||
else
|
||||
return XXH64_digest_endian(state_in, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
/* **************************
|
||||
* Canonical representation
|
||||
****************************/
|
||||
|
||||
/*! Default XXH result types are basic unsigned 32 and 64 bits.
|
||||
* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
|
||||
* These functions allow transformation of hash result into and from its canonical format.
|
||||
* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
|
||||
*/
|
||||
|
||||
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
|
||||
{
|
||||
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
|
||||
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
|
||||
ZSTD_memcpy(dst, &hash, sizeof(*dst));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
|
||||
{
|
||||
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
|
||||
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
|
||||
ZSTD_memcpy(dst, &hash, sizeof(*dst));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
|
||||
{
|
||||
return XXH_readBE32(src);
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
|
||||
{
|
||||
return XXH_readBE64(src);
|
||||
}
|
||||
|
||||
+5573
-172
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -14,7 +14,6 @@
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
|
||||
#include "error_private.h"
|
||||
#include "zstd_internal.h"
|
||||
|
||||
@@ -47,37 +46,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
|
||||
/*! ZSTD_getErrorString() :
|
||||
* provides error code string from enum */
|
||||
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
|
||||
|
||||
|
||||
|
||||
/*=**************************************************************
|
||||
* Custom allocator
|
||||
****************************************************************/
|
||||
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc)
|
||||
return customMem.customAlloc(customMem.opaque, size);
|
||||
return ZSTD_malloc(size);
|
||||
}
|
||||
|
||||
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc) {
|
||||
/* calloc implemented as malloc+memset;
|
||||
* not as efficient as calloc, but next best guess for custom malloc */
|
||||
void* const ptr = customMem.customAlloc(customMem.opaque, size);
|
||||
ZSTD_memset(ptr, 0, size);
|
||||
return ptr;
|
||||
}
|
||||
return ZSTD_calloc(1, size);
|
||||
}
|
||||
|
||||
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
|
||||
{
|
||||
if (ptr!=NULL) {
|
||||
if (customMem.customFree)
|
||||
customMem.customFree(customMem.opaque, ptr);
|
||||
else
|
||||
ZSTD_free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -19,10 +19,8 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "compiler.h"
|
||||
#include "cpu.h"
|
||||
#include "mem.h"
|
||||
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
||||
#include "error_private.h"
|
||||
@@ -30,7 +28,6 @@
|
||||
#include "../zstd.h"
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
#include "huf.h"
|
||||
#ifndef XXH_STATIC_LINKING_ONLY
|
||||
# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
||||
@@ -60,81 +57,7 @@ extern "C" {
|
||||
#undef MAX
|
||||
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@@ -143,7 +66,6 @@ void _force_has_format_string(const char *format, ...) {
|
||||
#define ZSTD_OPT_NUM (1<<12)
|
||||
|
||||
#define ZSTD_REP_NUM 3 /* number of repcodes */
|
||||
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
||||
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
|
||||
#define KB *(1 <<10)
|
||||
@@ -170,9 +92,9 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
|
||||
#define ZSTD_FRAMECHECKSUMSIZE 4
|
||||
|
||||
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
|
||||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
|
||||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
|
||||
#define MIN_LITERALS_FOR_4_STREAMS 6
|
||||
|
||||
#define HufLog 12
|
||||
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
|
||||
|
||||
#define LONGNBSEQ 0x7F00
|
||||
@@ -180,6 +102,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
||||
#define MINMATCH 3
|
||||
|
||||
#define Litbits 8
|
||||
#define LitHufLog 11
|
||||
#define MaxLit ((1<<Litbits) - 1)
|
||||
#define MaxML 52
|
||||
#define MaxLL 35
|
||||
@@ -190,12 +113,14 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
||||
#define LLFSELog 9
|
||||
#define OffFSELog 8
|
||||
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
|
||||
#define MaxMLBits 16
|
||||
#define MaxLLBits 16
|
||||
|
||||
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
|
||||
/* Each table cannot take more than #symbols * FSELog bits */
|
||||
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
|
||||
|
||||
static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
|
||||
static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
@@ -212,7 +137,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
|
||||
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
|
||||
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
||||
|
||||
static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
|
||||
static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
@@ -247,19 +172,30 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) {
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
||||
#else
|
||||
ZSTD_memcpy(dst, src, 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
|
||||
/* Need to use memmove here since the literal buffer can now be located within
|
||||
the dst buffer. In circumstances where the op "catches up" to where the
|
||||
literal buffer is, there can be partial overlaps in this call on the final
|
||||
copy if the literal is being shifted by less than 16 bytes. */
|
||||
static void ZSTD_copy16(void* dst, const void* src) {
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||
#elif defined(ZSTD_ARCH_X86_SSE2)
|
||||
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
||||
#elif defined(__clang__)
|
||||
ZSTD_memmove(dst, src, 16);
|
||||
#else
|
||||
ZSTD_memcpy(dst, src, 16);
|
||||
/* ZSTD_memmove is not inlined properly by gcc */
|
||||
BYTE copy16_buf[16];
|
||||
ZSTD_memcpy(copy16_buf, src, 16);
|
||||
ZSTD_memcpy(dst, copy16_buf, 16);
|
||||
#endif
|
||||
}
|
||||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
||||
@@ -288,8 +224,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
|
||||
|
||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
|
||||
/* Handle short offset copies. */
|
||||
do {
|
||||
@@ -303,12 +237,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
* one COPY16() in the first call. Then, do two calls per loop since
|
||||
* at that point it is more likely to have a high trip count.
|
||||
*/
|
||||
#ifdef __aarch64__
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
#else
|
||||
ZSTD_copy16(op, ip);
|
||||
if (16 >= length) return;
|
||||
op += 16;
|
||||
@@ -318,7 +246,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -352,9 +279,9 @@ typedef enum {
|
||||
* Private declarations
|
||||
*********************************************/
|
||||
typedef struct seqDef_s {
|
||||
U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
|
||||
U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
|
||||
U16 litLength;
|
||||
U16 matchLength;
|
||||
U16 mlBase; /* mlBase == matchLength - MINMATCH */
|
||||
} seqDef;
|
||||
|
||||
/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
|
||||
@@ -367,11 +294,11 @@ typedef enum {
|
||||
typedef struct {
|
||||
seqDef* sequencesStart;
|
||||
seqDef* sequences; /* ptr to end of sequences */
|
||||
BYTE* litStart;
|
||||
BYTE* lit; /* ptr to end of literals */
|
||||
BYTE* llCode;
|
||||
BYTE* mlCode;
|
||||
BYTE* ofCode;
|
||||
BYTE* litStart;
|
||||
BYTE* lit; /* ptr to end of literals */
|
||||
BYTE* llCode;
|
||||
BYTE* mlCode;
|
||||
BYTE* ofCode;
|
||||
size_t maxNbSeq;
|
||||
size_t maxNbLit;
|
||||
|
||||
@@ -379,8 +306,8 @@ typedef struct {
|
||||
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
|
||||
* the existing value of the litLength or matchLength by 0x10000.
|
||||
*/
|
||||
ZSTD_longLengthType_e longLengthType;
|
||||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
|
||||
ZSTD_longLengthType_e longLengthType;
|
||||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
|
||||
} seqStore_t;
|
||||
|
||||
typedef struct {
|
||||
@@ -396,13 +323,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
|
||||
{
|
||||
ZSTD_sequenceLength seqLen;
|
||||
seqLen.litLength = seq->litLength;
|
||||
seqLen.matchLength = seq->matchLength + MINMATCH;
|
||||
seqLen.matchLength = seq->mlBase + MINMATCH;
|
||||
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
|
||||
if (seqStore->longLengthType == ZSTD_llt_literalLength) {
|
||||
seqLen.litLength += 0xFFFF;
|
||||
seqLen.litLength += 0x10000;
|
||||
}
|
||||
if (seqStore->longLengthType == ZSTD_llt_matchLength) {
|
||||
seqLen.matchLength += 0xFFFF;
|
||||
seqLen.matchLength += 0x10000;
|
||||
}
|
||||
}
|
||||
return seqLen;
|
||||
@@ -415,46 +342,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
|
||||
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
|
||||
*/
|
||||
typedef struct {
|
||||
size_t nbBlocks;
|
||||
size_t compressedSize;
|
||||
unsigned long long decompressedBound;
|
||||
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
||||
|
||||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||
|
||||
/* custom memory allocation functions */
|
||||
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
|
||||
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
|
||||
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
|
||||
|
||||
|
||||
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
# if STATIC_BMI2 == 1
|
||||
return _lzcnt_u32(val)^31;
|
||||
# else
|
||||
unsigned long r=0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
return 31 - __CLZ(val);
|
||||
# else /* Software version */
|
||||
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
||||
U32 v = val;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
|
||||
# endif
|
||||
}
|
||||
}
|
||||
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||
|
||||
|
||||
/* ZSTD_invalidateRepCodes() :
|
||||
@@ -482,6 +376,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/**
|
||||
* @returns true iff the CPU supports dynamic BMI2 dispatch.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
|
||||
{
|
||||
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
|
||||
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -17,8 +17,17 @@ extern "C" {
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* weak symbol support */
|
||||
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
|
||||
/* weak symbol support
|
||||
* For now, enable conservatively:
|
||||
* - Only GNUC
|
||||
* - Only ELF
|
||||
* - Only x86-64, i386 and aarch64
|
||||
* Also, explicitly disable on platforms known not to work so they aren't
|
||||
* forgotten in the future.
|
||||
*/
|
||||
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
|
||||
defined(__GNUC__) && defined(__ELF__) && \
|
||||
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \
|
||||
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
|
||||
!defined(__CYGWIN__) && !defined(_AIX)
|
||||
# define ZSTD_HAVE_WEAK_SYMBOLS 1
|
||||
|
||||
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_CLEVELS_H
|
||||
#define ZSTD_CLEVELS_H
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
|
||||
#include "../zstd.h"
|
||||
|
||||
/*-===== Pre-defined compression levels =====-*/
|
||||
|
||||
#define ZSTD_MAX_CLEVEL 22
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__unused__))
|
||||
#endif
|
||||
|
||||
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
|
||||
{ /* "default" - for any srcSize > 256 KB */
|
||||
/* W, C, H, S, L, TL, strat */
|
||||
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
|
||||
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
|
||||
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
|
||||
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
|
||||
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
|
||||
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
|
||||
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
|
||||
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
|
||||
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
|
||||
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
|
||||
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
|
||||
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
|
||||
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
|
||||
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
|
||||
},
|
||||
{ /* for srcSize <= 256 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
|
||||
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
|
||||
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
|
||||
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
|
||||
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
|
||||
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
|
||||
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
|
||||
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
|
||||
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 128 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
|
||||
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
|
||||
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
|
||||
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
|
||||
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
|
||||
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
|
||||
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 16 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
|
||||
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
|
||||
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
|
||||
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
|
||||
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
|
||||
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
|
||||
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
|
||||
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
|
||||
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
|
||||
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
|
||||
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
|
||||
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* ZSTD_CLEVELS_H */
|
||||
@@ -1,6 +1,6 @@
|
||||
/* ******************************************************************
|
||||
* FSE : Finite State Entropy encoder
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
@@ -26,6 +26,7 @@
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#define ZSTD_DEPS_NEED_MATH64
|
||||
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
|
||||
#include "../common/bits.h" /* ZSTD_highbit32 */
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
@@ -75,13 +76,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 const maxSV1 = maxSymbolValue+1;
|
||||
|
||||
U32* cumul = (U32*)workSpace;
|
||||
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
|
||||
U16* cumul = (U16*)workSpace; /* size = maxSV1 */
|
||||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
|
||||
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
|
||||
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
|
||||
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
/* CTable header */
|
||||
tableU16[-2] = (U16) tableLog;
|
||||
@@ -89,7 +91,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
assert(tableLog < 16); /* required for threshold strategy to work */
|
||||
|
||||
/* For explanations on how to distribute symbol values over the table :
|
||||
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
||||
* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
||||
|
||||
#ifdef __clang_analyzer__
|
||||
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
||||
@@ -98,20 +100,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
/* symbol start positions */
|
||||
{ U32 u;
|
||||
cumul[0] = 0;
|
||||
for (u=1; u <= maxSymbolValue+1; u++) {
|
||||
for (u=1; u <= maxSV1; u++) {
|
||||
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
||||
cumul[u] = cumul[u-1] + 1;
|
||||
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
||||
} else {
|
||||
cumul[u] = cumul[u-1] + normalizedCounter[u-1];
|
||||
assert(normalizedCounter[u-1] >= 0);
|
||||
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
|
||||
assert(cumul[u] >= cumul[u-1]); /* no overflow */
|
||||
} }
|
||||
cumul[maxSymbolValue+1] = tableSize+1;
|
||||
cumul[maxSV1] = (U16)(tableSize+1);
|
||||
}
|
||||
|
||||
/* Spread symbols */
|
||||
{ U32 position = 0;
|
||||
if (highThreshold == tableSize - 1) {
|
||||
/* Case for no low prob count symbols. Lay down 8 bytes at a time
|
||||
* to reduce branch misses since we are operating on a small block
|
||||
*/
|
||||
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
|
||||
{ U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
U32 s;
|
||||
for (s=0; s<maxSV1; ++s, sv += add) {
|
||||
int i;
|
||||
int const n = normalizedCounter[s];
|
||||
MEM_write64(spread + pos, sv);
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
assert(n>=0);
|
||||
pos += (size_t)n;
|
||||
}
|
||||
}
|
||||
/* Spread symbols across the table. Lack of lowprob symbols means that
|
||||
* we don't need variable sized inner loop, so we can unroll the loop and
|
||||
* reduce branch misses.
|
||||
*/
|
||||
{ size_t position = 0;
|
||||
size_t s;
|
||||
size_t const unroll = 2; /* Experimentally determined optimal unroll */
|
||||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
||||
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
||||
size_t u;
|
||||
for (u = 0; u < unroll; ++u) {
|
||||
size_t const uPosition = (position + (u * step)) & tableMask;
|
||||
tableSymbol[uPosition] = spread[s + u];
|
||||
}
|
||||
position = (position + (unroll * step)) & tableMask;
|
||||
}
|
||||
assert(position == 0); /* Must have initialized all positions */
|
||||
}
|
||||
} else {
|
||||
U32 position = 0;
|
||||
U32 symbol;
|
||||
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
||||
for (symbol=0; symbol<maxSV1; symbol++) {
|
||||
int nbOccurrences;
|
||||
int const freq = normalizedCounter[symbol];
|
||||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
||||
@@ -120,7 +163,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
while (position > highThreshold)
|
||||
position = (position + step) & tableMask; /* Low proba area */
|
||||
} }
|
||||
|
||||
assert(position==0); /* Must have initialized all positions */
|
||||
}
|
||||
|
||||
@@ -144,16 +186,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
case -1:
|
||||
case 1:
|
||||
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
|
||||
symbolTT[s].deltaFindState = total - 1;
|
||||
assert(total <= INT_MAX);
|
||||
symbolTT[s].deltaFindState = (int)(total - 1);
|
||||
total ++;
|
||||
break;
|
||||
default :
|
||||
{
|
||||
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
|
||||
assert(normalizedCounter[s] > 1);
|
||||
{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
|
||||
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
|
||||
symbolTT[s].deltaFindState = total - normalizedCounter[s];
|
||||
total += normalizedCounter[s];
|
||||
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
|
||||
total += (unsigned)normalizedCounter[s];
|
||||
} } } }
|
||||
|
||||
#if 0 /* debug : symbol costs */
|
||||
@@ -164,32 +207,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
symbol, normalizedCounter[symbol],
|
||||
FSE_getMaxNbBits(symbolTT, symbol),
|
||||
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
||||
}
|
||||
}
|
||||
} }
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
|
||||
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifndef FSE_COMMONDEFS_ONLY
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* FSE NCount encoding
|
||||
****************************************************************/
|
||||
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
||||
+ 4 /* bitCount initialized at 4 */
|
||||
+ 2 /* first two symbols may use one additional bit each */) / 8)
|
||||
+ 1 /* round up to whole nb bytes */
|
||||
+ 2 /* additional two bytes for bitstream flush */;
|
||||
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
||||
}
|
||||
|
||||
@@ -306,21 +343,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
||||
* FSE Compression Code
|
||||
****************************************************************/
|
||||
|
||||
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
size_t size;
|
||||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
||||
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
||||
return (FSE_CTable*)ZSTD_malloc(size);
|
||||
}
|
||||
|
||||
void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
|
||||
|
||||
/* provides the minimum logSize to safely represent a distribution */
|
||||
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
||||
{
|
||||
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
|
||||
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
|
||||
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
|
||||
U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
|
||||
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
|
||||
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
||||
return minBits;
|
||||
@@ -328,7 +355,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
||||
|
||||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
|
||||
{
|
||||
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
|
||||
U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
|
||||
U32 tableLog = maxTableLog;
|
||||
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
|
||||
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
||||
@@ -496,40 +523,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
||||
return tableLog;
|
||||
}
|
||||
|
||||
|
||||
/* fake FSE_CTable, for raw (uncompressed) input */
|
||||
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
|
||||
{
|
||||
const unsigned tableSize = 1 << nbBits;
|
||||
const unsigned tableMask = tableSize - 1;
|
||||
const unsigned maxSymbolValue = tableMask;
|
||||
void* const ptr = ct;
|
||||
U16* const tableU16 = ( (U16*) ptr) + 2;
|
||||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
unsigned s;
|
||||
|
||||
/* Sanity checks */
|
||||
if (nbBits < 1) return ERROR(GENERIC); /* min size */
|
||||
|
||||
/* header */
|
||||
tableU16[-2] = (U16) nbBits;
|
||||
tableU16[-1] = (U16) maxSymbolValue;
|
||||
|
||||
/* Build table */
|
||||
for (s=0; s<tableSize; s++)
|
||||
tableU16[s] = (U16)(tableSize + s);
|
||||
|
||||
/* Build Symbol Transformation Table */
|
||||
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
symbolTT[s].deltaNbBits = deltaNbBits;
|
||||
symbolTT[s].deltaFindState = s-1;
|
||||
} }
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fake FSE_CTable, for rle input (always same symbol) */
|
||||
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
|
||||
{
|
||||
@@ -628,82 +621,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
|
||||
|
||||
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
/* FSE_compress_wksp() :
|
||||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` size must be `(1<<tableLog)`.
|
||||
*/
|
||||
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* op = ostart;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
|
||||
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
|
||||
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
|
||||
FSE_CTable* CTable = (FSE_CTable*)workSpace;
|
||||
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
|
||||
void* scratchBuffer = (void*)(CTable + CTableSize);
|
||||
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
|
||||
|
||||
#ifdef __clang_analyzer__
|
||||
memset(norm, 0, sizeof(norm));
|
||||
#endif
|
||||
|
||||
/* init conditions */
|
||||
if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
|
||||
if (srcSize <= 1) return 0; /* Not compressible */
|
||||
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
||||
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
|
||||
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
|
||||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
||||
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
|
||||
}
|
||||
|
||||
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
|
||||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
|
||||
|
||||
/* Write table description header */
|
||||
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
|
||||
op += nc_err;
|
||||
}
|
||||
|
||||
/* Compress */
|
||||
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
|
||||
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
|
||||
if (cSize == 0) return 0; /* not enough space for compressed data */
|
||||
op += cSize;
|
||||
}
|
||||
|
||||
/* check compressibility */
|
||||
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
|
||||
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
|
||||
union {
|
||||
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
||||
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
|
||||
} workspace;
|
||||
} fseWkspMax_t;
|
||||
|
||||
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
fseWkspMax_t scratchBuffer;
|
||||
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
||||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
||||
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
|
||||
}
|
||||
|
||||
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* FSE_COMMONDEFS_ONLY */
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* ******************************************************************
|
||||
* hist : Histogram functions
|
||||
* part of Finite State Entropy project
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* ******************************************************************
|
||||
* hist : Histogram functions
|
||||
* part of Finite State Entropy project
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -23,6 +23,7 @@
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
# include "zstdmt_compress.h"
|
||||
#endif
|
||||
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
@@ -63,7 +64,7 @@ typedef struct {
|
||||
} ZSTD_localDict;
|
||||
|
||||
typedef struct {
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
|
||||
HUF_repeat repeatMode;
|
||||
} ZSTD_hufCTables_t;
|
||||
|
||||
@@ -117,19 +118,20 @@ typedef struct {
|
||||
/** ZSTD_buildBlockEntropyStats() :
|
||||
* Builds entropy for the block.
|
||||
* @return : 0 on success or error code */
|
||||
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
||||
const ZSTD_entropyCTables_t* prevEntropy,
|
||||
ZSTD_entropyCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize);
|
||||
size_t ZSTD_buildBlockEntropyStats(
|
||||
const seqStore_t* seqStorePtr,
|
||||
const ZSTD_entropyCTables_t* prevEntropy,
|
||||
ZSTD_entropyCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize);
|
||||
|
||||
/*********************************
|
||||
* Compression internals structs *
|
||||
*********************************/
|
||||
|
||||
typedef struct {
|
||||
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
|
||||
U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
|
||||
U32 len; /* Raw length of match */
|
||||
} ZSTD_match_t;
|
||||
|
||||
@@ -148,6 +150,12 @@ typedef struct {
|
||||
size_t capacity; /* The capacity starting from `seq` pointer */
|
||||
} rawSeqStore_t;
|
||||
|
||||
typedef struct {
|
||||
U32 idx; /* Index in array of ZSTD_Sequence */
|
||||
U32 posInSequence; /* Position within sequence at idx */
|
||||
size_t posInSrc; /* Number of bytes given by sequences provided so far */
|
||||
} ZSTD_sequencePosition;
|
||||
|
||||
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
|
||||
|
||||
typedef struct {
|
||||
@@ -179,7 +187,7 @@ typedef struct {
|
||||
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
|
||||
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
|
||||
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
ZSTD_paramSwitch_e literalCompressionMode;
|
||||
} optState_t;
|
||||
|
||||
typedef struct {
|
||||
@@ -199,6 +207,8 @@ typedef struct {
|
||||
*/
|
||||
} ZSTD_window_t;
|
||||
|
||||
#define ZSTD_WINDOW_START_INDEX 2
|
||||
|
||||
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
||||
|
||||
#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
|
||||
@@ -216,8 +226,10 @@ struct ZSTD_matchState_t {
|
||||
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
|
||||
|
||||
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
|
||||
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
|
||||
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
|
||||
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
|
||||
U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
|
||||
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
|
||||
|
||||
U32* hashTable;
|
||||
U32* hashTable3;
|
||||
@@ -232,6 +244,18 @@ struct ZSTD_matchState_t {
|
||||
const ZSTD_matchState_t* dictMatchState;
|
||||
ZSTD_compressionParameters cParams;
|
||||
const rawSeqStore_t* ldmSeqStore;
|
||||
|
||||
/* Controls prefetching in some dictMatchState matchfinders.
|
||||
* This behavior is controlled from the cctx ms.
|
||||
* This parameter has no effect in the cdict ms. */
|
||||
int prefetchCDictTables;
|
||||
|
||||
/* When == 0, lazy match finders insert every position.
|
||||
* When != 0, lazy match finders only insert positions they search.
|
||||
* This allows them to skip much faster over incompressible data,
|
||||
* at a small cost to compression ratio.
|
||||
*/
|
||||
int lazySkipping;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -264,7 +288,7 @@ typedef struct {
|
||||
} ldmState_t;
|
||||
|
||||
typedef struct {
|
||||
U32 enableLdm; /* 1 if enable long distance matching */
|
||||
ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
|
||||
U32 hashLog; /* Log size of hashTable */
|
||||
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
|
||||
U32 minMatchLength; /* Minimum match length */
|
||||
@@ -295,7 +319,7 @@ struct ZSTD_CCtx_params_s {
|
||||
* There is no guarantee that hint is close to actual source size */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
ZSTD_paramSwitch_e literalCompressionMode;
|
||||
|
||||
/* Multithreading: used to pass parameters to mtctx */
|
||||
int nbWorkers;
|
||||
@@ -318,16 +342,34 @@ struct ZSTD_CCtx_params_s {
|
||||
int validateSequences;
|
||||
|
||||
/* Block splitting */
|
||||
int splitBlocks;
|
||||
ZSTD_paramSwitch_e useBlockSplitter;
|
||||
|
||||
/* Param for deciding whether to use row-based matchfinder */
|
||||
ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
|
||||
ZSTD_paramSwitch_e useRowMatchFinder;
|
||||
|
||||
/* Always load a dictionary in ext-dict mode (not prefix mode)? */
|
||||
int deterministicRefPrefix;
|
||||
|
||||
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
||||
ZSTD_customMem customMem;
|
||||
|
||||
/* Controls prefetching in some dictMatchState matchfinders */
|
||||
ZSTD_paramSwitch_e prefetchCDictTables;
|
||||
|
||||
/* Controls whether zstd will fall back to an internal matchfinder
|
||||
* if the external matchfinder returns an error code. */
|
||||
int enableMatchFinderFallback;
|
||||
|
||||
/* Indicates whether an external matchfinder has been referenced.
|
||||
* Users can't set this externally.
|
||||
* It is set internally in ZSTD_registerSequenceProducer(). */
|
||||
int useSequenceProducer;
|
||||
|
||||
/* Adjust the max block size*/
|
||||
size_t maxBlockSize;
|
||||
|
||||
/* Controls repcode search in external sequence parsing */
|
||||
ZSTD_paramSwitch_e searchForExternalRepcodes;
|
||||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
||||
|
||||
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
|
||||
@@ -343,6 +385,30 @@ typedef enum {
|
||||
ZSTDb_buffered
|
||||
} ZSTD_buffered_policy_e;
|
||||
|
||||
/**
|
||||
* Struct that contains all elements of block splitter that should be allocated
|
||||
* in a wksp.
|
||||
*/
|
||||
#define ZSTD_MAX_NB_BLOCK_SPLITS 196
|
||||
typedef struct {
|
||||
seqStore_t fullSeqStoreChunk;
|
||||
seqStore_t firstHalfSeqStore;
|
||||
seqStore_t secondHalfSeqStore;
|
||||
seqStore_t currSeqStore;
|
||||
seqStore_t nextSeqStore;
|
||||
|
||||
U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
|
||||
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
||||
} ZSTD_blockSplitCtx;
|
||||
|
||||
/* Context for block-level external matchfinder API */
|
||||
typedef struct {
|
||||
void* mState;
|
||||
ZSTD_sequenceProducer_F* mFinder;
|
||||
ZSTD_Sequence* seqBuffer;
|
||||
size_t seqBufferCapacity;
|
||||
} ZSTD_externalMatchCtx;
|
||||
|
||||
struct ZSTD_CCtx_s {
|
||||
ZSTD_compressionStage_e stage;
|
||||
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
||||
@@ -374,7 +440,7 @@ struct ZSTD_CCtx_s {
|
||||
ZSTD_blockState_t blockState;
|
||||
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
||||
|
||||
/* Wether we are streaming or not */
|
||||
/* Whether we are streaming or not */
|
||||
ZSTD_buffered_policy_e bufferedPolicy;
|
||||
|
||||
/* streaming */
|
||||
@@ -392,6 +458,7 @@ struct ZSTD_CCtx_s {
|
||||
|
||||
/* Stable in/out buffer verification */
|
||||
ZSTD_inBuffer expectedInBuffer;
|
||||
size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
|
||||
size_t expectedOutBufferSize;
|
||||
|
||||
/* Dictionary */
|
||||
@@ -408,9 +475,16 @@ struct ZSTD_CCtx_s {
|
||||
#if ZSTD_TRACE
|
||||
ZSTD_TraceCtx traceCtx;
|
||||
#endif
|
||||
|
||||
/* Workspace for block splitter */
|
||||
ZSTD_blockSplitCtx blockSplitCtx;
|
||||
|
||||
/* Workspace for external matchfinder */
|
||||
ZSTD_externalMatchCtx externalMatchCtx;
|
||||
};
|
||||
|
||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||
typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
|
||||
|
||||
typedef enum {
|
||||
ZSTD_noDict = 0,
|
||||
@@ -432,7 +506,7 @@ typedef enum {
|
||||
* In this mode we take both the source size and the dictionary size
|
||||
* into account when selecting and adjusting the parameters.
|
||||
*/
|
||||
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
|
||||
ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
|
||||
* We don't know what these parameters are for. We default to the legacy
|
||||
* behavior of taking both the source size and the dict size into account
|
||||
* when selecting and adjusting parameters.
|
||||
@@ -442,7 +516,7 @@ typedef enum {
|
||||
typedef size_t (*ZSTD_blockCompressor) (
|
||||
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
|
||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
|
||||
|
||||
|
||||
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
|
||||
@@ -476,31 +550,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
|
||||
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
|
||||
}
|
||||
|
||||
typedef struct repcodes_s {
|
||||
U32 rep[3];
|
||||
} repcodes_t;
|
||||
|
||||
MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
|
||||
{
|
||||
repcodes_t newReps;
|
||||
if (offset >= ZSTD_REP_NUM) { /* full offset */
|
||||
newReps.rep[2] = rep[1];
|
||||
newReps.rep[1] = rep[0];
|
||||
newReps.rep[0] = offset - ZSTD_REP_MOVE;
|
||||
} else { /* repcode */
|
||||
U32 const repCode = offset + ll0;
|
||||
if (repCode > 0) { /* note : if repCode==0, no change */
|
||||
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
||||
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
||||
newReps.rep[1] = rep[0];
|
||||
newReps.rep[0] = currentOffset;
|
||||
} else { /* repCode == 0 */
|
||||
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
||||
}
|
||||
}
|
||||
return newReps;
|
||||
}
|
||||
|
||||
/* ZSTD_cParam_withinBounds:
|
||||
* @return 1 if value is within cParam bounds,
|
||||
* 0 otherwise */
|
||||
@@ -516,9 +565,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
|
||||
/* ZSTD_noCompressBlock() :
|
||||
* Writes uncompressed block to dst buffer from given src.
|
||||
* Returns the size of the block */
|
||||
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
|
||||
MEM_STATIC size_t
|
||||
ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
|
||||
{
|
||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
|
||||
DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
|
||||
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
|
||||
dstSize_tooSmall, "dst buf too small for uncompressed block");
|
||||
MEM_writeLE24(dst, cBlockHeader24);
|
||||
@@ -526,7 +577,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
|
||||
return ZSTD_blockHeaderSize + srcSize;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
|
||||
MEM_STATIC size_t
|
||||
ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
|
||||
{
|
||||
BYTE* const op = (BYTE*)dst;
|
||||
U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
|
||||
@@ -545,21 +597,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
||||
{
|
||||
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
|
||||
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
|
||||
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
|
||||
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
|
||||
return (srcSize >> minlog) + 2;
|
||||
}
|
||||
|
||||
MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
|
||||
MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
|
||||
{
|
||||
switch (cctxParams->literalCompressionMode) {
|
||||
case ZSTD_lcm_huffman:
|
||||
case ZSTD_ps_enable:
|
||||
return 0;
|
||||
case ZSTD_lcm_uncompressed:
|
||||
case ZSTD_ps_disable:
|
||||
return 1;
|
||||
default:
|
||||
assert(0 /* impossible: pre-validated */);
|
||||
/* fall-through */
|
||||
case ZSTD_lcm_auto:
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTD_ps_auto:
|
||||
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
|
||||
}
|
||||
}
|
||||
@@ -569,7 +621,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam
|
||||
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
|
||||
* large copies.
|
||||
*/
|
||||
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
|
||||
static void
|
||||
ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
|
||||
{
|
||||
assert(iend > ilimit_w);
|
||||
if (ip <= ilimit_w) {
|
||||
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
|
||||
@@ -579,14 +633,28 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
|
||||
while (ip < iend) *op++ = *ip++;
|
||||
}
|
||||
|
||||
|
||||
#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
|
||||
#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
|
||||
#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
|
||||
#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
|
||||
#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
|
||||
#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
|
||||
#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
|
||||
#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
|
||||
#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */
|
||||
|
||||
/*! ZSTD_storeSeq() :
|
||||
* Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
|
||||
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
|
||||
* `mlBase` : matchLength - MINMATCH
|
||||
* Allowed to overread literals up to litLimit.
|
||||
* Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
|
||||
* @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
|
||||
* @matchLength : must be >= MINMATCH
|
||||
* Allowed to over-read literals up to litLimit.
|
||||
*/
|
||||
HINT_INLINE UNUSED_ATTR
|
||||
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
|
||||
HINT_INLINE UNUSED_ATTR void
|
||||
ZSTD_storeSeq(seqStore_t* seqStorePtr,
|
||||
size_t litLength, const BYTE* literals, const BYTE* litLimit,
|
||||
U32 offBase,
|
||||
size_t matchLength)
|
||||
{
|
||||
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
|
||||
BYTE const* const litEnd = literals + litLength;
|
||||
@@ -594,8 +662,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
|
||||
static const BYTE* g_start = NULL;
|
||||
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
|
||||
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
|
||||
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
|
||||
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
|
||||
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
|
||||
pos, (U32)litLength, (U32)matchLength, (U32)offBase);
|
||||
}
|
||||
#endif
|
||||
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
|
||||
@@ -605,9 +673,9 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
|
||||
assert(literals + litLength <= litLimit);
|
||||
if (litEnd <= litLimit_w) {
|
||||
/* Common case we can use wildcopy.
|
||||
* First copy 16 bytes, because literals are likely short.
|
||||
*/
|
||||
assert(WILDCOPY_OVERLENGTH >= 16);
|
||||
* First copy 16 bytes, because literals are likely short.
|
||||
*/
|
||||
ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
|
||||
ZSTD_copy16(seqStorePtr->lit, literals);
|
||||
if (litLength > 16) {
|
||||
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
|
||||
@@ -626,96 +694,63 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
|
||||
seqStorePtr->sequences[0].litLength = (U16)litLength;
|
||||
|
||||
/* match offset */
|
||||
seqStorePtr->sequences[0].offset = offCode + 1;
|
||||
seqStorePtr->sequences[0].offBase = offBase;
|
||||
|
||||
/* match Length */
|
||||
if (mlBase>0xFFFF) {
|
||||
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
|
||||
seqStorePtr->longLengthType = ZSTD_llt_matchLength;
|
||||
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
||||
assert(matchLength >= MINMATCH);
|
||||
{ size_t const mlBase = matchLength - MINMATCH;
|
||||
if (mlBase>0xFFFF) {
|
||||
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
|
||||
seqStorePtr->longLengthType = ZSTD_llt_matchLength;
|
||||
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
||||
}
|
||||
seqStorePtr->sequences[0].mlBase = (U16)mlBase;
|
||||
}
|
||||
seqStorePtr->sequences[0].matchLength = (U16)mlBase;
|
||||
|
||||
seqStorePtr->sequences++;
|
||||
}
|
||||
|
||||
/* ZSTD_updateRep() :
|
||||
* updates in-place @rep (array of repeat offsets)
|
||||
* @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
|
||||
*/
|
||||
MEM_STATIC void
|
||||
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
|
||||
{
|
||||
if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */
|
||||
rep[2] = rep[1];
|
||||
rep[1] = rep[0];
|
||||
rep[0] = OFFBASE_TO_OFFSET(offBase);
|
||||
} else { /* repcode */
|
||||
U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
|
||||
if (repCode > 0) { /* note : if repCode==0, no change */
|
||||
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
||||
rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
||||
rep[1] = rep[0];
|
||||
rep[0] = currentOffset;
|
||||
} else { /* repCode == 0 */
|
||||
/* nothing to do */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct repcodes_s {
|
||||
U32 rep[3];
|
||||
} repcodes_t;
|
||||
|
||||
MEM_STATIC repcodes_t
|
||||
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
|
||||
{
|
||||
repcodes_t newReps;
|
||||
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
||||
ZSTD_updateRep(newReps.rep, offBase, ll0);
|
||||
return newReps;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Match length counter
|
||||
***************************************/
|
||||
static unsigned ZSTD_NbCommonBytes (size_t val)
|
||||
{
|
||||
if (MEM_isLittleEndian()) {
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
# if STATIC_BMI2
|
||||
return _tzcnt_u64(val) >> 3;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
|
||||
0, 3, 1, 3, 1, 4, 2, 7,
|
||||
0, 2, 3, 6, 1, 5, 3, 5,
|
||||
1, 3, 4, 4, 2, 5, 6, 7,
|
||||
7, 0, 1, 2, 3, 3, 4, 6,
|
||||
2, 6, 5, 5, 3, 4, 5, 6,
|
||||
7, 1, 2, 4, 6, 4, 4, 5,
|
||||
7, 2, 6, 5, 7, 6, 7, 7 };
|
||||
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r=0;
|
||||
return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctz((U32)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
|
||||
3, 2, 2, 1, 3, 2, 0, 1,
|
||||
3, 3, 1, 2, 2, 2, 2, 0,
|
||||
3, 1, 2, 0, 1, 0, 1, 1 };
|
||||
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
||||
# endif
|
||||
}
|
||||
} else { /* Big Endian CPU */
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
# if STATIC_BMI2
|
||||
return _lzcnt_u64(val) >> 3;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (__builtin_clzll(val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
||||
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
||||
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
||||
r += (!val);
|
||||
return r;
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clz((U32)val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
||||
r += (!val);
|
||||
return r;
|
||||
# endif
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
|
||||
{
|
||||
const BYTE* const pStart = pIn;
|
||||
@@ -761,32 +796,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
|
||||
* Hashes
|
||||
***************************************/
|
||||
static const U32 prime3bytes = 506832829U;
|
||||
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
|
||||
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
|
||||
static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
|
||||
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
|
||||
MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
|
||||
|
||||
static const U32 prime4bytes = 2654435761U;
|
||||
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
|
||||
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
|
||||
static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
|
||||
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
|
||||
static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
|
||||
|
||||
static const U64 prime5bytes = 889523592379ULL;
|
||||
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
|
||||
static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
|
||||
static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
|
||||
|
||||
static const U64 prime6bytes = 227718039650203ULL;
|
||||
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
||||
static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
|
||||
static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
|
||||
|
||||
static const U64 prime7bytes = 58295818150454627ULL;
|
||||
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
|
||||
static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
|
||||
static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
|
||||
|
||||
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
||||
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
||||
static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
|
||||
static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
|
||||
|
||||
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
||||
{
|
||||
/* Although some of these hashes do support hBits up to 64, some do not.
|
||||
* To be on the safe side, always avoid hBits > 32. */
|
||||
assert(hBits <= 32);
|
||||
|
||||
switch(mls)
|
||||
{
|
||||
default:
|
||||
@@ -798,6 +844,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
|
||||
/* Although some of these hashes do support hBits up to 64, some do not.
|
||||
* To be on the safe side, always avoid hBits > 32. */
|
||||
assert(hBits <= 32);
|
||||
|
||||
switch(mls)
|
||||
{
|
||||
default:
|
||||
case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
|
||||
case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
|
||||
case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
|
||||
case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
|
||||
case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_ipow() :
|
||||
* Return base^exponent.
|
||||
*/
|
||||
@@ -884,9 +948,9 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
|
||||
|
||||
MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
|
||||
{
|
||||
return window.dictLimit == 1 &&
|
||||
window.lowLimit == 1 &&
|
||||
(window.nextSrc - window.base) == 1;
|
||||
return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
|
||||
window.lowLimit == ZSTD_WINDOW_START_INDEX &&
|
||||
(window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -937,7 +1001,9 @@ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
|
||||
{
|
||||
U32 const cycleSize = 1u << cycleLog;
|
||||
U32 const curr = (U32)((BYTE const*)src - window.base);
|
||||
U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
|
||||
U32 const minIndexToOverflowCorrect = cycleSize
|
||||
+ MAX(maxDist, cycleSize)
|
||||
+ ZSTD_WINDOW_START_INDEX;
|
||||
|
||||
/* Adjust the min index to backoff the overflow correction frequency,
|
||||
* so we don't waste too much CPU in overflow correction. If this
|
||||
@@ -1012,10 +1078,14 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
U32 const cycleSize = 1u << cycleLog;
|
||||
U32 const cycleMask = cycleSize - 1;
|
||||
U32 const curr = (U32)((BYTE const*)src - window->base);
|
||||
U32 const currentCycle0 = curr & cycleMask;
|
||||
/* Exclude zero so that newCurrent - maxDist >= 1. */
|
||||
U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
|
||||
U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
|
||||
U32 const currentCycle = curr & cycleMask;
|
||||
/* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
|
||||
U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
|
||||
? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
|
||||
: 0;
|
||||
U32 const newCurrent = currentCycle
|
||||
+ currentCycleCorrection
|
||||
+ MAX(maxDist, cycleSize);
|
||||
U32 const correction = curr - newCurrent;
|
||||
/* maxDist must be a power of two so that:
|
||||
* (newCurrent & cycleMask) == (curr & cycleMask)
|
||||
@@ -1031,14 +1101,20 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
|
||||
window->base += correction;
|
||||
window->dictBase += correction;
|
||||
if (window->lowLimit <= correction) window->lowLimit = 1;
|
||||
else window->lowLimit -= correction;
|
||||
if (window->dictLimit <= correction) window->dictLimit = 1;
|
||||
else window->dictLimit -= correction;
|
||||
if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
||||
window->lowLimit = ZSTD_WINDOW_START_INDEX;
|
||||
} else {
|
||||
window->lowLimit -= correction;
|
||||
}
|
||||
if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
||||
window->dictLimit = ZSTD_WINDOW_START_INDEX;
|
||||
} else {
|
||||
window->dictLimit -= correction;
|
||||
}
|
||||
|
||||
/* Ensure we can still reference the full window. */
|
||||
assert(newCurrent >= maxDist);
|
||||
assert(newCurrent - maxDist >= 1);
|
||||
assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
|
||||
/* Ensure that lowLimit and dictLimit didn't underflow. */
|
||||
assert(window->lowLimit <= newCurrent);
|
||||
assert(window->dictLimit <= newCurrent);
|
||||
@@ -1133,10 +1209,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
||||
assert(blockEndIdx >= loadedDictEnd);
|
||||
|
||||
if (blockEndIdx > loadedDictEnd + maxDist) {
|
||||
if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
|
||||
/* On reaching window size, dictionaries are invalidated.
|
||||
* For simplification, if window size is reached anywhere within next block,
|
||||
* the dictionary is invalidated for the full block.
|
||||
*
|
||||
* We also have to invalidate the dictionary if ZSTD_window_update() has detected
|
||||
* non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
|
||||
* loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
|
||||
* dictMatchState, so setting it to NULL is not a problem.
|
||||
*/
|
||||
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
|
||||
*loadedDictEndPtr = 0;
|
||||
@@ -1149,11 +1230,12 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
||||
|
||||
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
||||
ZSTD_memset(window, 0, sizeof(*window));
|
||||
window->base = (BYTE const*)"";
|
||||
window->dictBase = (BYTE const*)"";
|
||||
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
|
||||
window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
|
||||
window->nextSrc = window->base + 1; /* see issue #1241 */
|
||||
window->base = (BYTE const*)" ";
|
||||
window->dictBase = (BYTE const*)" ";
|
||||
ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
|
||||
window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
|
||||
window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
|
||||
window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
|
||||
window->nbOverflowCorrections = 0;
|
||||
}
|
||||
|
||||
@@ -1206,15 +1288,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
||||
{
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.lowLimit;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.lowLimit;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
|
||||
* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
|
||||
* valid for the entire block. So this check is sufficient to find the lowest valid match index.
|
||||
*/
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
return matchLowest;
|
||||
}
|
||||
|
||||
@@ -1267,6 +1349,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
||||
|
||||
#endif
|
||||
|
||||
/* Short Cache */
|
||||
|
||||
/* Normally, zstd matchfinders follow this flow:
|
||||
* 1. Compute hash at ip
|
||||
* 2. Load index from hashTable[hash]
|
||||
* 3. Check if *ip == *(base + index)
|
||||
* In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
|
||||
*
|
||||
* Short cache is an optimization which allows us to avoid step 3 most of the time
|
||||
* when the data doesn't actually match. With short cache, the flow becomes:
|
||||
* 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
|
||||
* 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
|
||||
* 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
|
||||
*
|
||||
* Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
|
||||
* dictMatchState matchfinders.
|
||||
*/
|
||||
#define ZSTD_SHORT_CACHE_TAG_BITS 8
|
||||
#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
|
||||
|
||||
/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
|
||||
* Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
|
||||
MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
|
||||
size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
|
||||
assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
|
||||
hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
|
||||
}
|
||||
|
||||
/* Helper function for short cache matchfinders.
|
||||
* Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
|
||||
MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
|
||||
U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
|
||||
U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
|
||||
return tag1 == tag2;
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
@@ -1364,4 +1482,51 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
|
||||
*/
|
||||
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
|
||||
|
||||
/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
|
||||
* ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
|
||||
* Note that the block delimiter must include the last literals of the block.
|
||||
*/
|
||||
size_t
|
||||
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
|
||||
ZSTD_sequencePosition* seqPos,
|
||||
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
|
||||
|
||||
/* Returns the number of bytes to move the current read position back by.
|
||||
* Only non-zero if we ended up splitting a sequence.
|
||||
* Otherwise, it may return a ZSTD error if something went wrong.
|
||||
*
|
||||
* This function will attempt to scan through blockSize bytes
|
||||
* represented by the sequences in @inSeqs,
|
||||
* storing any (partial) sequences.
|
||||
*
|
||||
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
|
||||
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
|
||||
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
|
||||
*/
|
||||
size_t
|
||||
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
||||
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
|
||||
|
||||
|
||||
/* ===============================================================
|
||||
* Deprecated definitions that are still used internally to avoid
|
||||
* deprecation warnings. These functions are exactly equivalent to
|
||||
* their public variants, but avoid the deprecation warnings.
|
||||
* =============================================================== */
|
||||
|
||||
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
|
||||
|
||||
size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
|
||||
#endif /* ZSTD_COMPRESS_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -13,11 +13,36 @@
|
||||
***************************************/
|
||||
#include "zstd_compress_literals.h"
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
* Debug Traces
|
||||
****************************************************************/
|
||||
#if DEBUGLEVEL >= 2
|
||||
|
||||
static size_t showHexa(const void* src, size_t srcSize)
|
||||
{
|
||||
const BYTE* const ip = (const BYTE*)src;
|
||||
size_t u;
|
||||
for (u=0; u<srcSize; u++) {
|
||||
RAWLOG(5, " %02X", ip[u]); (void)ip;
|
||||
}
|
||||
RAWLOG(5, " \n");
|
||||
return srcSize;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
* Literals compression - special cases
|
||||
****************************************************************/
|
||||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
|
||||
|
||||
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
|
||||
|
||||
switch(flSize)
|
||||
@@ -36,16 +61,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
||||
}
|
||||
|
||||
ZSTD_memcpy(ostart + flSize, src, srcSize);
|
||||
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
||||
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
||||
return srcSize + flSize;
|
||||
}
|
||||
|
||||
static int allBytesIdentical(const void* src, size_t srcSize)
|
||||
{
|
||||
assert(srcSize >= 1);
|
||||
assert(src != NULL);
|
||||
{ const BYTE b = ((const BYTE*)src)[0];
|
||||
size_t p;
|
||||
for (p=1; p<srcSize; p++) {
|
||||
if (((const BYTE*)src)[p] != b) return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
||||
|
||||
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
|
||||
assert(dstCapacity >= 4); (void)dstCapacity;
|
||||
assert(allBytesIdentical(src, srcSize));
|
||||
|
||||
switch(flSize)
|
||||
{
|
||||
@@ -63,27 +102,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
|
||||
}
|
||||
|
||||
ostart[flSize] = *(const BYTE*)src;
|
||||
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
|
||||
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
|
||||
return flSize+1;
|
||||
}
|
||||
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2)
|
||||
/* ZSTD_minLiteralsToCompress() :
|
||||
* returns minimal amount of literals
|
||||
* for literal compression to even be attempted.
|
||||
* Minimum is made tighter as compression strategy increases.
|
||||
*/
|
||||
static size_t
|
||||
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
|
||||
{
|
||||
assert((int)strategy >= 0);
|
||||
assert((int)strategy <= 9);
|
||||
/* btultra2 : min 8 bytes;
|
||||
* then 2x larger for each successive compression strategy
|
||||
* max threshold 64 bytes */
|
||||
{ int const shift = MIN(9-(int)strategy, 3);
|
||||
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
|
||||
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
|
||||
return mintc;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_compressLiterals (
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const ZSTD_hufCTables_t* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy,
|
||||
int disableLiteralCompression,
|
||||
int suspectUncompressible,
|
||||
int bmi2)
|
||||
{
|
||||
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
||||
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
U32 singleStream = srcSize < 256;
|
||||
symbolEncodingType_e hType = set_compressed;
|
||||
size_t cLitSize;
|
||||
|
||||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
|
||||
disableLiteralCompression, (U32)srcSize);
|
||||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
|
||||
disableLiteralCompression, (U32)srcSize, dstCapacity);
|
||||
|
||||
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
@@ -91,40 +154,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
if (disableLiteralCompression)
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
|
||||
/* small ? don't even attempt compression (speed opt) */
|
||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
||||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
||||
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
/* if too small, don't even attempt compression (speed opt) */
|
||||
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
|
||||
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
|
||||
{ HUF_repeat repeat = prevHuf->repeatMode;
|
||||
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
||||
int const flags = 0
|
||||
| (bmi2 ? HUF_flags_bmi2 : 0)
|
||||
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
|
||||
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
|
||||
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
|
||||
|
||||
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
|
||||
huf_compress_f huf_compress;
|
||||
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
||||
cLitSize = singleStream ?
|
||||
HUF_compress1X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
|
||||
HUF_compress4X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
||||
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
|
||||
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
|
||||
src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, LitHufLog,
|
||||
entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable,
|
||||
&repeat, flags);
|
||||
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
|
||||
if (repeat != HUF_repeat_none) {
|
||||
/* reused the existing table */
|
||||
DEBUGLOG(5, "Reusing previous huffman table");
|
||||
DEBUGLOG(5, "reusing statistics from previous huffman block");
|
||||
hType = set_repeat;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
{ size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
||||
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
} }
|
||||
if (cLitSize==1) {
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
/* A return value of 1 signals that the alphabet consists of a single symbol.
|
||||
* However, in some rare circumstances, it could be the compressed size (a single byte).
|
||||
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
|
||||
* (it's also necessary to not generate statistics).
|
||||
* Therefore, in such a case, actively check that all bytes are identical. */
|
||||
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
||||
} }
|
||||
|
||||
if (hType == set_compressed) {
|
||||
/* using a newly constructed table */
|
||||
@@ -135,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
switch(lhSize)
|
||||
{
|
||||
case 3: /* 2 - 2 - 10 - 10 */
|
||||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
||||
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
||||
MEM_writeLE24(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 4: /* 2 - 2 - 14 - 14 */
|
||||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 5: /* 2 - 2 - 18 - 18 */
|
||||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
ostart[4] = (BYTE)(cLitSize >> 10);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -16,14 +16,24 @@
|
||||
|
||||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
/* ZSTD_compressRleLiteralsBlock() :
|
||||
* Conditions :
|
||||
* - All bytes in @src are identical
|
||||
* - dstCapacity >= 4 */
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
/* ZSTD_compressLiterals():
|
||||
* @entropyWorkspace: must be aligned on 4-bytes boundaries
|
||||
* @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
|
||||
* @suspectUncompressible: sampling checks, to potentially skip huffman coding
|
||||
*/
|
||||
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2);
|
||||
const ZSTD_hufCTables_t* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
int suspectUncompressible,
|
||||
int bmi2);
|
||||
|
||||
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -58,7 +58,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
|
||||
{
|
||||
/* Heuristic: This should cover most blocks <= 16K and
|
||||
* start to fade out after 16K to about 32K depending on
|
||||
* comprssibility.
|
||||
* compressibility.
|
||||
*/
|
||||
return nbSeq >= 2048;
|
||||
}
|
||||
@@ -166,7 +166,7 @@ ZSTD_selectEncodingType(
|
||||
if (mostFrequent == nbSeq) {
|
||||
*repeatMode = FSE_repeat_none;
|
||||
if (isDefaultAllowed && nbSeq <= 2) {
|
||||
/* Prefer set_basic over set_rle when there are 2 or less symbols,
|
||||
/* Prefer set_basic over set_rle when there are 2 or fewer symbols,
|
||||
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
||||
* If basic encoding isn't possible, always choose RLE.
|
||||
*/
|
||||
@@ -275,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
assert(nbSeq_1 > 1);
|
||||
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
|
||||
(void)entropyWorkspaceSize;
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
|
||||
assert(oend >= op);
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
|
||||
return NCountSize;
|
||||
}
|
||||
}
|
||||
@@ -312,19 +313,19 @@ ZSTD_encodeSequences_body(
|
||||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
U32 const ofBits = ofCodeTable[nbSeq-1];
|
||||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
|
||||
BIT_flushBits(&blockStream);
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
|
||||
ofBits - extraBits);
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
|
||||
}
|
||||
BIT_flushBits(&blockStream);
|
||||
|
||||
@@ -338,8 +339,8 @@ ZSTD_encodeSequences_body(
|
||||
U32 const mlBits = ML_bits[mlCode];
|
||||
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
||||
(unsigned)sequences[n].litLength,
|
||||
(unsigned)sequences[n].matchLength + MINMATCH,
|
||||
(unsigned)sequences[n].offset);
|
||||
(unsigned)sequences[n].mlBase + MINMATCH,
|
||||
(unsigned)sequences[n].offBase);
|
||||
/* 32b*/ /* 64b*/
|
||||
/* (7)*/ /* (7)*/
|
||||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
||||
@@ -350,18 +351,18 @@ ZSTD_encodeSequences_body(
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
||||
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
||||
BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
|
||||
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
||||
BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
||||
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
|
||||
ofBits - extraBits); /* 31 */
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
||||
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
|
||||
}
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
|
||||
@@ -398,7 +399,7 @@ ZSTD_encodeSequences_default(
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
ZSTD_encodeSequences_bmi2(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -36,13 +36,14 @@
|
||||
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
|
||||
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
|
||||
* @return : compressed size of literals section of a sub-block
|
||||
* Or 0 if it unable to compress.
|
||||
* Or 0 if unable to compress.
|
||||
* Or error code */
|
||||
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
const BYTE* literals, size_t litSize,
|
||||
void* dst, size_t dstSize,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
static size_t
|
||||
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
const BYTE* literals, size_t litSize,
|
||||
void* dst, size_t dstSize,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
{
|
||||
size_t const header = writeEntropy ? 200 : 0;
|
||||
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
|
||||
@@ -53,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
|
||||
size_t cLitSize = 0;
|
||||
|
||||
(void)bmi2; /* TODO bmi2... */
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
|
||||
|
||||
*entropyWritten = 0;
|
||||
@@ -76,9 +75,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
|
||||
}
|
||||
|
||||
/* TODO bmi2 */
|
||||
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
|
||||
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
|
||||
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
|
||||
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
|
||||
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
|
||||
op += cSize;
|
||||
cLitSize += cSize;
|
||||
if (cSize == 0 || ERR_isError(cSize)) {
|
||||
@@ -126,12 +125,17 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
|
||||
static size_t
|
||||
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
|
||||
const seqDef* sequences, size_t nbSeq,
|
||||
size_t litSize, int lastSequence)
|
||||
{
|
||||
const seqDef* const sstart = sequences;
|
||||
const seqDef* const send = sequences + nbSeq;
|
||||
const seqDef* sp = sstart;
|
||||
size_t matchLengthSum = 0;
|
||||
size_t litLengthSum = 0;
|
||||
(void)(litLengthSum); /* suppress unused variable warning on some environments */
|
||||
while (send-sp > 0) {
|
||||
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
|
||||
litLengthSum += seqLen.litLength;
|
||||
@@ -155,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
||||
* @return : compressed size of sequences section of a sub-block
|
||||
* Or 0 if it is unable to compress
|
||||
* Or error code. */
|
||||
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
||||
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
const seqDef* sequences, size_t nbSeq,
|
||||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
void* dst, size_t dstCapacity,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
static size_t
|
||||
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
||||
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
const seqDef* sequences, size_t nbSeq,
|
||||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
void* dst, size_t dstCapacity,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
{
|
||||
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
@@ -324,7 +329,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
||||
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
||||
const BYTE* codeTable, unsigned maxCode,
|
||||
size_t nbSeq, const FSE_CTable* fseCTable,
|
||||
const U32* additionalBits,
|
||||
const U8* additionalBits,
|
||||
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
@@ -474,7 +479,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
/* I think there is an optimization opportunity here.
|
||||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
||||
* since it recalculates estimate from scratch.
|
||||
* For example, it would recount literal distribution and symbol codes everytime.
|
||||
* For example, it would recount literal distribution and symbol codes every time.
|
||||
*/
|
||||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
||||
&nextCBlock->entropy, entropyMetadata,
|
||||
@@ -538,7 +543,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
repcodes_t rep;
|
||||
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
||||
for (seq = sstart; seq < sp; ++seq) {
|
||||
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
||||
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
||||
}
|
||||
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -14,7 +14,9 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
||||
#include "../common/zstd_internal.h"
|
||||
#include "../common/portability_macros.h"
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
@@ -44,8 +46,9 @@ extern "C" {
|
||||
***************************************/
|
||||
typedef enum {
|
||||
ZSTD_cwksp_alloc_objects,
|
||||
ZSTD_cwksp_alloc_buffers,
|
||||
ZSTD_cwksp_alloc_aligned
|
||||
ZSTD_cwksp_alloc_aligned_init_once,
|
||||
ZSTD_cwksp_alloc_aligned,
|
||||
ZSTD_cwksp_alloc_buffers
|
||||
} ZSTD_cwksp_alloc_phase_e;
|
||||
|
||||
/**
|
||||
@@ -98,8 +101,8 @@ typedef enum {
|
||||
*
|
||||
* Workspace Layout:
|
||||
*
|
||||
* [ ... workspace ... ]
|
||||
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
|
||||
* [ ... workspace ... ]
|
||||
* [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
|
||||
*
|
||||
* The various objects that live in the workspace are divided into the
|
||||
* following categories, and are allocated separately:
|
||||
@@ -123,9 +126,18 @@ typedef enum {
|
||||
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
|
||||
* Their sizes depend on the cparams. These tables are 64-byte aligned.
|
||||
*
|
||||
* - Aligned: these buffers are used for various purposes that require 4 byte
|
||||
* alignment, but don't require any initialization before they're used. These
|
||||
* buffers are each aligned to 64 bytes.
|
||||
* - Init once: these buffers require to be initialized at least once before
|
||||
* use. They should be used when we want to skip memory initialization
|
||||
* while not triggering memory checkers (like Valgrind) when reading from
|
||||
* from this memory without writing to it first.
|
||||
* These buffers should be used carefully as they might contain data
|
||||
* from previous compressions.
|
||||
* Buffers are aligned to 64 bytes.
|
||||
*
|
||||
* - Aligned: these buffers don't require any initialization before they're
|
||||
* used. The user of the buffer should make sure they write into a buffer
|
||||
* location before reading from it.
|
||||
* Buffers are aligned to 64 bytes.
|
||||
*
|
||||
* - Buffers: these buffers are used for various purposes that don't require
|
||||
* any alignment or initialization before they're used. This means they can
|
||||
@@ -137,8 +149,9 @@ typedef enum {
|
||||
* correctly packed into the workspace buffer. That order is:
|
||||
*
|
||||
* 1. Objects
|
||||
* 2. Buffers
|
||||
* 3. Aligned/Tables
|
||||
* 2. Init once / Tables
|
||||
* 3. Aligned / Tables
|
||||
* 4. Buffers / Tables
|
||||
*
|
||||
* Attempts to reserve objects of different types out of order will fail.
|
||||
*/
|
||||
@@ -150,6 +163,7 @@ typedef struct {
|
||||
void* tableEnd;
|
||||
void* tableValidEnd;
|
||||
void* allocStart;
|
||||
void* initOnceStart;
|
||||
|
||||
BYTE allocFailed;
|
||||
int workspaceOversizedDuration;
|
||||
@@ -162,6 +176,7 @@ typedef struct {
|
||||
***************************************/
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
|
||||
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
|
||||
(void)ws;
|
||||
@@ -171,6 +186,20 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
|
||||
assert(ws->tableEnd <= ws->allocStart);
|
||||
assert(ws->tableValidEnd <= ws->allocStart);
|
||||
assert(ws->allocStart <= ws->workspaceEnd);
|
||||
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
|
||||
assert(ws->workspace <= ws->initOnceStart);
|
||||
#if ZSTD_MEMORY_SANITIZER
|
||||
{
|
||||
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
|
||||
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
|
||||
#if defined(ZSTD_MSAN_PRINT)
|
||||
if(offset!=-1) {
|
||||
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
|
||||
}
|
||||
#endif
|
||||
assert(offset==-1);
|
||||
};
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -217,14 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
|
||||
* for internal purposes (currently only alignment).
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
|
||||
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
|
||||
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes
|
||||
* to align the beginning of the aligned secion.
|
||||
*
|
||||
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
|
||||
* aligneds being sized in multiples of 64 bytes.
|
||||
/* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
|
||||
* bytes to align the beginning of tables section and end of buffers;
|
||||
*/
|
||||
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
|
||||
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
|
||||
return slackSpace;
|
||||
}
|
||||
|
||||
@@ -237,18 +262,28 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
|
||||
size_t const alignBytesMask = alignBytes - 1;
|
||||
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
|
||||
assert((alignBytes & alignBytesMask) == 0);
|
||||
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
assert(bytes < alignBytes);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the initial value for allocStart which is used to determine the position from
|
||||
* which we can allocate from the end of the workspace.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
|
||||
return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal function. Do not use directly.
|
||||
* Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
|
||||
* counts from the end of the wksp. (as opposed to the object/table segment)
|
||||
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
|
||||
* which counts from the end of the wksp (as opposed to the object/table segment).
|
||||
*
|
||||
* Returns a pointer to the beginning of that space.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
|
||||
MEM_STATIC void*
|
||||
ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
|
||||
{
|
||||
void* const alloc = (BYTE*)ws->allocStart - bytes;
|
||||
void* const bottom = ws->tableEnd;
|
||||
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
|
||||
@@ -260,6 +295,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t
|
||||
ws->allocFailed = 1;
|
||||
return NULL;
|
||||
}
|
||||
/* the area is reserved from the end of wksp.
|
||||
* If it overlaps with tableValidEnd, it voids guarantees on values' range */
|
||||
if (alloc < ws->tableValidEnd) {
|
||||
ws->tableValidEnd = alloc;
|
||||
}
|
||||
@@ -269,39 +306,32 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t
|
||||
|
||||
/**
|
||||
* Moves the cwksp to the next phase, and does any necessary allocations.
|
||||
* cwksp initialization must necessarily go through each phase in order.
|
||||
* Returns a 0 on success, or zstd error
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
|
||||
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
|
||||
MEM_STATIC size_t
|
||||
ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase)
|
||||
{
|
||||
assert(phase >= ws->phase);
|
||||
if (phase > ws->phase) {
|
||||
/* Going from allocating objects to allocating buffers */
|
||||
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
|
||||
phase >= ZSTD_cwksp_alloc_buffers) {
|
||||
/* Going from allocating objects to allocating initOnce / tables */
|
||||
if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
|
||||
phase >= ZSTD_cwksp_alloc_aligned_init_once) {
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
}
|
||||
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
|
||||
|
||||
/* Going from allocating buffers to allocating aligneds/tables */
|
||||
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
|
||||
phase >= ZSTD_cwksp_alloc_aligned) {
|
||||
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
|
||||
size_t const bytesToAlign =
|
||||
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
|
||||
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
|
||||
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
|
||||
memory_allocation, "aligned phase - alignment initial allocation failed!");
|
||||
}
|
||||
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
|
||||
void* const alloc = ws->objectEnd;
|
||||
void *const alloc = ws->objectEnd;
|
||||
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
void* const end = (BYTE*)alloc + bytesToAlign;
|
||||
void *const objectEnd = (BYTE *) alloc + bytesToAlign;
|
||||
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
|
||||
RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
|
||||
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
|
||||
"table phase - alignment initial allocation failed!");
|
||||
ws->objectEnd = end;
|
||||
ws->tableEnd = end;
|
||||
ws->tableValidEnd = end;
|
||||
ws->objectEnd = objectEnd;
|
||||
ws->tableEnd = objectEnd; /* table area starts being empty */
|
||||
if (ws->tableValidEnd < ws->tableEnd) {
|
||||
ws->tableValidEnd = ws->tableEnd;
|
||||
}
|
||||
}
|
||||
}
|
||||
ws->phase = phase;
|
||||
@@ -313,15 +343,17 @@ MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
|
||||
/**
|
||||
* Returns whether this object/buffer/etc was allocated in this workspace.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
|
||||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
|
||||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
|
||||
{
|
||||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal function. Do not use directly.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
|
||||
MEM_STATIC void*
|
||||
ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase)
|
||||
{
|
||||
void* alloc;
|
||||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
|
||||
return NULL;
|
||||
@@ -340,7 +372,9 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
if (alloc) {
|
||||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
/* We need to keep the redzone poisoned while unpoisoning the bytes that
|
||||
* are actually allocated. */
|
||||
__asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -351,14 +385,46 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
/**
|
||||
* Reserves and returns unaligned memory.
|
||||
*/
|
||||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
|
||||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
|
||||
* This memory has been initialized at least once in the past.
|
||||
* This doesn't mean it has been initialized this time, and it might contain data from previous
|
||||
* operations.
|
||||
* The main usage is for algorithms that might need read access into uninitialized memory.
|
||||
* The algorithm must maintain safety under these conditions and must make sure it doesn't
|
||||
* leak any of the past data (directly or in side channels).
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
|
||||
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
||||
if(ptr && ptr < ws->initOnceStart) {
|
||||
/* We assume the memory following the current allocation is either:
|
||||
* 1. Not usable as initOnce memory (end of workspace)
|
||||
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
|
||||
* 3. An ASAN redzone, in which case we don't want to write on it
|
||||
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
|
||||
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
|
||||
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
|
||||
ws->initOnceStart = ptr;
|
||||
}
|
||||
#if ZSTD_MEMORY_SANITIZER
|
||||
assert(__msan_test_shadow(ptr, bytes) == -1);
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
|
||||
ZSTD_cwksp_alloc_aligned);
|
||||
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
||||
@@ -370,14 +436,19 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
|
||||
* their values remain constrained, allowing us to re-use them without
|
||||
* memset()-ing them.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
|
||||
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
|
||||
void* alloc;
|
||||
void* end;
|
||||
void* top;
|
||||
|
||||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
|
||||
return NULL;
|
||||
/* We can only start allocating tables after we are done reserving space for objects at the
|
||||
* start of the workspace */
|
||||
if(ws->phase < phase) {
|
||||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
alloc = ws->tableEnd;
|
||||
end = (BYTE *)alloc + bytes;
|
||||
@@ -408,9 +479,11 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
|
||||
|
||||
/**
|
||||
* Aligned on sizeof(void*).
|
||||
* Note : should happen only once, at workspace first initialization
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
|
||||
void* alloc = ws->objectEnd;
|
||||
void* end = (BYTE*)alloc + roundedBytes;
|
||||
|
||||
@@ -419,15 +492,15 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#endif
|
||||
|
||||
DEBUGLOG(5,
|
||||
DEBUGLOG(4,
|
||||
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
|
||||
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
|
||||
assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
|
||||
assert((bytes & (sizeof(void*)-1)) == 0);
|
||||
assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
|
||||
assert(bytes % ZSTD_ALIGNOF(void*) == 0);
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
/* we must be in the first phase, no advance is possible */
|
||||
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
|
||||
DEBUGLOG(4, "cwksp: object alloc failed!");
|
||||
DEBUGLOG(3, "cwksp: object alloc failed!");
|
||||
ws->allocFailed = 1;
|
||||
return NULL;
|
||||
}
|
||||
@@ -438,7 +511,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
|
||||
* either size. */
|
||||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
alloc = (BYTE*)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
}
|
||||
@@ -447,17 +520,26 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
return alloc;
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
|
||||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
|
||||
{
|
||||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
|
||||
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the table re-use logic is sound, and that we don't
|
||||
* access table space that we haven't cleaned, we re-"poison" the table
|
||||
* space every time we mark it dirty. */
|
||||
* space every time we mark it dirty.
|
||||
* Since tableValidEnd space and initOnce space may overlap we don't poison
|
||||
* the initOnce portion as it break its promise. This means that this poisoning
|
||||
* check isn't always applied fully. */
|
||||
{
|
||||
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
|
||||
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
|
||||
__msan_poison(ws->objectEnd, size);
|
||||
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
|
||||
__msan_poison(ws->objectEnd, size);
|
||||
} else {
|
||||
assert(ws->initOnceStart >= ws->objectEnd);
|
||||
__msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -485,7 +567,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
|
||||
assert(ws->tableValidEnd >= ws->objectEnd);
|
||||
assert(ws->tableValidEnd <= ws->allocStart);
|
||||
if (ws->tableValidEnd < ws->tableEnd) {
|
||||
ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
|
||||
ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
|
||||
}
|
||||
ZSTD_cwksp_mark_tables_clean(ws);
|
||||
}
|
||||
@@ -522,11 +604,14 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the context re-use logic is sound, and that we don't
|
||||
* access stuff that this compression hasn't initialized, we re-"poison"
|
||||
* the workspace (or at least the non-static, non-table parts of it)
|
||||
* every time we start a new compression. */
|
||||
* the workspace except for the areas in which we expect memory re-use
|
||||
* without initialization (objects, valid tables area and init once
|
||||
* memory). */
|
||||
{
|
||||
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
|
||||
__msan_poison(ws->tableValidEnd, size);
|
||||
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
|
||||
size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
|
||||
__msan_poison(ws->tableValidEnd, size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -542,10 +627,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
||||
#endif
|
||||
|
||||
ws->tableEnd = ws->objectEnd;
|
||||
ws->allocStart = ws->workspaceEnd;
|
||||
ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
|
||||
ws->allocFailed = 0;
|
||||
if (ws->phase > ZSTD_cwksp_alloc_buffers) {
|
||||
ws->phase = ZSTD_cwksp_alloc_buffers;
|
||||
if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
|
||||
ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
|
||||
}
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
}
|
||||
@@ -562,6 +647,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
|
||||
ws->workspaceEnd = (BYTE*)start + size;
|
||||
ws->objectEnd = ws->workspace;
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
|
||||
ws->phase = ZSTD_cwksp_alloc_objects;
|
||||
ws->isStatic = isStatic;
|
||||
ZSTD_cwksp_clear(ws);
|
||||
@@ -614,17 +700,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
|
||||
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
|
||||
* actual amount of space used.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
|
||||
size_t const estimatedSpace, int resizedWorkspace) {
|
||||
if (resizedWorkspace) {
|
||||
/* Resized/newly allocated wksp should have exact bounds */
|
||||
return ZSTD_cwksp_used(ws) == estimatedSpace;
|
||||
} else {
|
||||
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
|
||||
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
|
||||
*/
|
||||
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
|
||||
}
|
||||
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
|
||||
/* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
|
||||
* the alignment bytes difference between estimation and actual usage */
|
||||
return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
|
||||
ZSTD_cwksp_used(ws) <= estimatedSpace;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -11,8 +11,43 @@
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "zstd_double_fast.h"
|
||||
|
||||
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashLarge = ms->hashTable;
|
||||
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
U32 const mls = cParams->minMatch;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* ip = base + ms->nextToUpdate;
|
||||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
||||
const U32 fastHashFillStep = 3;
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
/* Always insert every fastHashFillStep position into the hash tables.
|
||||
* Insert the other positions into the large hash table if their entry
|
||||
* is empty.
|
||||
*/
|
||||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 i;
|
||||
for (i = 0; i < fastHashFillStep; ++i) {
|
||||
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
||||
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
||||
if (i == 0) {
|
||||
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
|
||||
}
|
||||
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
|
||||
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
|
||||
}
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
} }
|
||||
}
|
||||
|
||||
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
@@ -43,15 +78,26 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
} }
|
||||
} }
|
||||
}
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp)
|
||||
{
|
||||
if (tfp == ZSTD_tfp_forCDict) {
|
||||
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
|
||||
} else {
|
||||
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
|
||||
void const* src, size_t srcSize, U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
@@ -60,7 +106,6 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const U32 hBitsS = cParams->chainLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
||||
@@ -69,188 +114,161 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
||||
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const ZSTD_compressionParameters* const dictCParams =
|
||||
dictMode == ZSTD_dictMatchState ?
|
||||
&dms->cParams : NULL;
|
||||
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
|
||||
dms->hashTable : NULL;
|
||||
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
|
||||
dms->chainTable : NULL;
|
||||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
||||
dictBase + dictStartIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->hashLog : hBitsL;
|
||||
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->chainLog : hBitsS;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
||||
size_t mLength;
|
||||
U32 offset;
|
||||
U32 curr;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
||||
/* how many positions to search before increasing step size */
|
||||
const size_t kStepIncr = 1 << kSearchStrength;
|
||||
/* the position at which to increment the step size if no match is found */
|
||||
const BYTE* nextStep;
|
||||
size_t step; /* the current step size */
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
size_t hl0; /* the long hash at ip */
|
||||
size_t hl1; /* the long hash at ip1 */
|
||||
|
||||
/* if a dictionary is attached, it must be within window range */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
}
|
||||
U32 idxl0; /* the long match index for ip */
|
||||
U32 idxl1; /* the long match index for ip1 */
|
||||
|
||||
const BYTE* matchl0; /* the long match for ip */
|
||||
const BYTE* matchs0; /* the short match for ip */
|
||||
const BYTE* matchl1; /* the long match for ip1 */
|
||||
|
||||
const BYTE* ip = istart; /* the current position */
|
||||
const BYTE* ip1; /* the next position */
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
ip += ((ip - prefixLowest) == 0);
|
||||
{
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
size_t mLength;
|
||||
U32 offset;
|
||||
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
||||
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const matchIndexL = hashLong[h2];
|
||||
U32 matchIndexS = hashSmall[h];
|
||||
const BYTE* matchLong = base + matchIndexL;
|
||||
const BYTE* match = base + matchIndexS;
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
||||
/* Outer Loop: one iteration per match found and stored */
|
||||
while (1) {
|
||||
step = 1;
|
||||
nextStep = ip + kStepIncr;
|
||||
ip1 = ip + step;
|
||||
|
||||
/* check dictMatchState repcode */
|
||||
if (dictMode == ZSTD_dictMatchState
|
||||
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
if (ip1 > ilimit) {
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* check noDict repcode */
|
||||
if ( dictMode == ZSTD_noDict
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
idxl0 = hashLong[hl0];
|
||||
matchl0 = base + idxl0;
|
||||
|
||||
if (matchIndexL > prefixLowestIndex) {
|
||||
/* check prefix long match */
|
||||
if (MEM_read64(matchLong) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
|
||||
offset = (U32)(ip-matchLong);
|
||||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
/* Inner Loop: one iteration per search / position */
|
||||
do {
|
||||
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
const U32 idxs0 = hashSmall[hs0];
|
||||
curr = (U32)(ip-base);
|
||||
matchs0 = base + idxs0;
|
||||
|
||||
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
|
||||
|
||||
/* check noDict repcode */
|
||||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
||||
goto _match_stored;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
/* check dictMatchState long match */
|
||||
U32 const dictMatchIndexL = dictHashLong[dictHL];
|
||||
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
||||
assert(dictMatchL < dictEnd);
|
||||
|
||||
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
||||
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} }
|
||||
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
|
||||
|
||||
if (matchIndexS > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
if (idxl0 > prefixLowestIndex) {
|
||||
/* check prefix long match */
|
||||
if (MEM_read64(matchl0) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl0);
|
||||
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
/* check dictMatchState short match */
|
||||
U32 const dictMatchIndexS = dictHashSmall[dictHS];
|
||||
match = dictBase + dictMatchIndexS;
|
||||
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
||||
|
||||
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
} }
|
||||
idxl1 = hashLong[hl1];
|
||||
matchl1 = base + idxl1;
|
||||
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip+256);
|
||||
#endif
|
||||
continue;
|
||||
if (idxs0 > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
if (MEM_read32(matchs0) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
}
|
||||
|
||||
if (ip1 >= nextStep) {
|
||||
PREFETCH_L1(ip1 + 64);
|
||||
PREFETCH_L1(ip1 + 128);
|
||||
step++;
|
||||
nextStep += kStepIncr;
|
||||
}
|
||||
ip = ip1;
|
||||
ip1 += step;
|
||||
|
||||
hl0 = hl1;
|
||||
idxl0 = idxl1;
|
||||
matchl0 = matchl1;
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip+256);
|
||||
#endif
|
||||
} while (ip1 <= ilimit);
|
||||
|
||||
_cleanup:
|
||||
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
||||
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
||||
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
|
||||
_search_next_long:
|
||||
|
||||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
U32 const matchIndexL3 = hashLong[hl3];
|
||||
const BYTE* matchL3 = base + matchIndexL3;
|
||||
hashLong[hl3] = curr + 1;
|
||||
|
||||
/* check prefix long +1 match */
|
||||
if (matchIndexL3 > prefixLowestIndex) {
|
||||
if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
|
||||
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
|
||||
ip++;
|
||||
offset = (U32)(ip-matchL3);
|
||||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
/* check dict long +1 match */
|
||||
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
|
||||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
||||
assert(dictMatchL3 < dictEnd);
|
||||
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
||||
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
||||
ip++;
|
||||
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} } }
|
||||
/* check prefix long +1 match */
|
||||
if (idxl1 > prefixLowestIndex) {
|
||||
if (MEM_read64(matchl1) == MEM_read64(ip1)) {
|
||||
ip = ip1;
|
||||
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl1);
|
||||
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
||||
offset = (U32)(curr - matchIndexS);
|
||||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
} else {
|
||||
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
||||
offset = (U32)(ip - match);
|
||||
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
}
|
||||
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
|
||||
offset = (U32)(ip - matchs0);
|
||||
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
|
||||
|
||||
/* fall-through */
|
||||
|
||||
_match_found:
|
||||
_match_found: /* requires ip, offset, mLength */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
if (step < 4) {
|
||||
/* It is unsafe to write this value back to the hashtable when ip1 is
|
||||
* greater than or equal to the new ip we will have after we're done
|
||||
* processing this match. Rather than perform that test directly
|
||||
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
|
||||
* more predictable test. The minmatch even if we take a short match is
|
||||
* 4 bytes, so as long as step, the distance between ip and ip1
|
||||
* (initially) is less than 4, we know ip1 < new ip. */
|
||||
hashLong[hl1] = (U32)(ip1 - base);
|
||||
}
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
@@ -268,53 +286,268 @@ _match_stored:
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
||||
&& repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} }
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
const U32 hBitsL = cParams->hashLog;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
const U32 hBitsS = cParams->chainLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
||||
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
||||
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
|
||||
const U32* const dictHashLong = dms->hashTable;
|
||||
const U32* const dictHashSmall = dms->chainTable;
|
||||
const U32 dictStartIndex = dms->window.dictLimit;
|
||||
const BYTE* const dictBase = dms->window.base;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
||||
|
||||
/* if a dictionary is attached, it must be within window range */
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
|
||||
if (ms->prefetchCDictTables) {
|
||||
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
||||
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
|
||||
PREFETCH_AREA(dictHashLong, hashTableBytes)
|
||||
PREFETCH_AREA(dictHashSmall, chainTableBytes)
|
||||
}
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
size_t mLength;
|
||||
U32 offset;
|
||||
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
||||
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
||||
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
|
||||
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const matchIndexL = hashLong[h2];
|
||||
U32 matchIndexS = hashSmall[h];
|
||||
const BYTE* matchLong = base + matchIndexL;
|
||||
const BYTE* match = base + matchIndexS;
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
||||
|
||||
/* check repcode */
|
||||
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
if (matchIndexL > prefixLowestIndex) {
|
||||
/* check prefix long match */
|
||||
if (MEM_read64(matchLong) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
|
||||
offset = (U32)(ip-matchLong);
|
||||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictTagsMatchL) {
|
||||
/* check dictMatchState long match */
|
||||
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
||||
assert(dictMatchL < dictEnd);
|
||||
|
||||
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
||||
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} }
|
||||
|
||||
if (matchIndexS > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
} else if (dictTagsMatchS) {
|
||||
/* check dictMatchState short match */
|
||||
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
match = dictBase + dictMatchIndexS;
|
||||
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
||||
|
||||
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
} }
|
||||
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip+256);
|
||||
#endif
|
||||
continue;
|
||||
|
||||
_search_next_long:
|
||||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
U32 const matchIndexL3 = hashLong[hl3];
|
||||
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
|
||||
const BYTE* matchL3 = base + matchIndexL3;
|
||||
hashLong[hl3] = curr + 1;
|
||||
|
||||
/* check prefix long +1 match */
|
||||
if (matchIndexL3 > prefixLowestIndex) {
|
||||
if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
|
||||
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
|
||||
ip++;
|
||||
offset = (U32)(ip-matchL3);
|
||||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictTagsMatchL3) {
|
||||
/* check dict long +1 match */
|
||||
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
||||
assert(dictMatchL3 < dictEnd);
|
||||
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
||||
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
||||
ip++;
|
||||
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} } }
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (matchIndexS < prefixLowestIndex) {
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
||||
offset = (U32)(curr - matchIndexS);
|
||||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
} else {
|
||||
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
||||
offset = (U32)(ip - match);
|
||||
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
}
|
||||
|
||||
_match_found:
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
rep[0] = offset_1;
|
||||
rep[1] = offset_2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
|
||||
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
||||
void const* src, size_t srcSize) \
|
||||
{ \
|
||||
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(noDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 7)
|
||||
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@@ -325,13 +558,13 @@ size_t ZSTD_compressBlock_doubleFast(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -345,13 +578,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -387,7 +620,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
|
||||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
@@ -409,12 +642,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
||||
|
||||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
||||
& (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
||||
} else {
|
||||
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
||||
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
||||
@@ -425,7 +658,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
||||
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
@@ -450,7 +683,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
}
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
} else {
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
@@ -477,12 +710,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
||||
& (offset_2 < current2 - dictStartIndex))
|
||||
& (offset_2 <= current2 - dictStartIndex))
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
@@ -500,6 +733,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(extDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 7)
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@@ -510,12 +747,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -19,7 +19,8 @@ extern "C" {
|
||||
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp);
|
||||
size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -19,7 +19,8 @@ extern "C" {
|
||||
#include "zstd_compress_internal.h"
|
||||
|
||||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp);
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -25,6 +25,8 @@ extern "C" {
|
||||
*/
|
||||
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
|
||||
|
||||
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
|
||||
|
||||
@@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
|
||||
size_t ZSTD_compressBlock_btlazy2_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -159,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
||||
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
||||
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
||||
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
||||
return params.enableLdm ? totalSize : 0;
|
||||
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
||||
{
|
||||
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_getBucket() :
|
||||
@@ -242,11 +242,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
||||
switch(ms->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
|
||||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
||||
break;
|
||||
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
|
||||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
||||
break;
|
||||
|
||||
case ZSTD_greedy:
|
||||
@@ -478,7 +478,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
*/
|
||||
if (anchor > ip + hashed) {
|
||||
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
|
||||
/* Continue the outter loop at anchor (ip + hashed == anchor). */
|
||||
/* Continue the outer loop at anchor (ip + hashed == anchor). */
|
||||
ip = anchor - hashed;
|
||||
break;
|
||||
}
|
||||
@@ -549,7 +549,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
* the window through early invalidation.
|
||||
* TODO: * Test the chunk size.
|
||||
* * Try invalidation after the sequence generation and test the
|
||||
* the offset against maxDist directly.
|
||||
* offset against maxDist directly.
|
||||
*
|
||||
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
|
||||
* that any offset used is valid at the END of the sequence, since it may
|
||||
@@ -579,7 +579,9 @@ size_t ZSTD_ldm_generateSequences(
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
|
||||
void
|
||||
ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
|
||||
{
|
||||
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
|
||||
if (srcSize <= seq->litLength) {
|
||||
@@ -657,7 +659,7 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
||||
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
@@ -709,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
rep[0] = sequence.offset;
|
||||
/* Store the sequence */
|
||||
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
|
||||
sequence.offset + ZSTD_REP_MOVE,
|
||||
sequence.matchLength - MINMATCH);
|
||||
OFFSET_TO_OFFBASE(sequence.offset),
|
||||
sequence.matchLength);
|
||||
ip += sequence.matchLength;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -66,7 +66,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
*/
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -11,7 +11,10 @@
|
||||
#ifndef ZSTD_LDM_GEARTAB_H
|
||||
#define ZSTD_LDM_GEARTAB_H
|
||||
|
||||
static U64 ZSTD_ldm_gearTab[256] = {
|
||||
#include "../common/compiler.h" /* UNUSED_ATTR */
|
||||
#include "../common/mem.h" /* U64 */
|
||||
|
||||
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
|
||||
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
|
||||
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
|
||||
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
|
||||
/* ====== Dependencies ====== */
|
||||
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
||||
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
|
||||
#include "../common/mem.h" /* MEM_STATIC */
|
||||
#include "../common/pool.h" /* threadpool */
|
||||
@@ -102,9 +103,8 @@ typedef struct ZSTDMT_bufferPool_s {
|
||||
buffer_t bTable[1]; /* variable size */
|
||||
} ZSTDMT_bufferPool;
|
||||
|
||||
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
|
||||
{
|
||||
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
||||
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
|
||||
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
||||
if (bufPool==NULL) return NULL;
|
||||
@@ -160,9 +160,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
|
||||
}
|
||||
|
||||
|
||||
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
|
||||
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
|
||||
{
|
||||
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
||||
if (srcBufPool==NULL) return NULL;
|
||||
if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
|
||||
return srcBufPool;
|
||||
@@ -171,7 +170,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
|
||||
size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
|
||||
ZSTDMT_bufferPool* newBufPool;
|
||||
ZSTDMT_freeBufferPool(srcBufPool);
|
||||
newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
||||
newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
|
||||
if (newBufPool==NULL) return newBufPool;
|
||||
ZSTDMT_setBufferSize(newBufPool, bSize);
|
||||
return newBufPool;
|
||||
@@ -263,6 +262,16 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
|
||||
ZSTD_customFree(buf.start, bufPool->cMem);
|
||||
}
|
||||
|
||||
/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
|
||||
* The 3 additional buffers are as follows:
|
||||
* 1 buffer for input loading
|
||||
* 1 buffer for "next input" when submitting current one
|
||||
* 1 buffer stuck in queue */
|
||||
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
|
||||
|
||||
/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
|
||||
* So we only need one seq buffer per worker. */
|
||||
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
|
||||
|
||||
/* ===== Seq Pool Wrapper ====== */
|
||||
|
||||
@@ -316,7 +325,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
|
||||
|
||||
static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
{
|
||||
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
||||
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
||||
if (seqPool == NULL) return NULL;
|
||||
ZSTDMT_setNbSeq(seqPool, 0);
|
||||
return seqPool;
|
||||
@@ -329,7 +338,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
|
||||
|
||||
static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
||||
{
|
||||
return ZSTDMT_expandBufferPool(pool, nbWorkers);
|
||||
return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
|
||||
}
|
||||
|
||||
|
||||
@@ -467,7 +476,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
/* Adjust parameters */
|
||||
if (params.ldmParams.enableLdm) {
|
||||
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
||||
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
||||
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
||||
@@ -478,7 +487,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
serialState->nextJobID = 0;
|
||||
if (params.fParams.checksumFlag)
|
||||
XXH64_reset(&serialState->xxhState, 0);
|
||||
if (params.ldmParams.enableLdm) {
|
||||
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
ZSTD_customMem cMem = params.customMem;
|
||||
unsigned const hashLog = params.ldmParams.hashLog;
|
||||
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
|
||||
@@ -564,7 +573,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
||||
/* A future job may error and skip our job */
|
||||
if (serialState->nextJobID == jobID) {
|
||||
/* It is now our turn, do any processing necessary */
|
||||
if (serialState->params.ldmParams.enableLdm) {
|
||||
if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
size_t error;
|
||||
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
|
||||
seqStore.size == 0 && seqStore.capacity > 0);
|
||||
@@ -594,7 +603,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
||||
if (seqStore.size > 0) {
|
||||
size_t const err = ZSTD_referenceExternalSequences(
|
||||
jobCCtx, seqStore.seq, seqStore.size);
|
||||
assert(serialState->params.ldmParams.enableLdm);
|
||||
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
|
||||
assert(!ZSTD_isError(err));
|
||||
(void)err;
|
||||
}
|
||||
@@ -672,7 +681,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
|
||||
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
|
||||
}
|
||||
if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
|
||||
if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
|
||||
JOB_ERROR(ERROR(memory_allocation));
|
||||
|
||||
/* Don't compute the checksum for chunks, since we compute it externally,
|
||||
@@ -680,7 +689,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
*/
|
||||
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
||||
/* Don't run LDM for the chunks, since we handle it externally */
|
||||
jobParams.ldmParams.enableLdm = 0;
|
||||
jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
|
||||
/* Correct nbWorkers to 0. */
|
||||
jobParams.nbWorkers = 0;
|
||||
|
||||
@@ -711,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
|
||||
|
||||
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
|
||||
size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
|
||||
size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
|
||||
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
|
||||
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
|
||||
ZSTD_invalidateRepCodes(cctx);
|
||||
@@ -729,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
|
||||
assert(job->cSize == 0);
|
||||
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
|
||||
size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
|
||||
size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
|
||||
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
||||
ip += chunkSize;
|
||||
op += cSize; assert(op < oend);
|
||||
@@ -749,8 +758,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
|
||||
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
|
||||
size_t const cSize = (job->lastJob) ?
|
||||
ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
|
||||
ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
|
||||
ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
|
||||
ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
|
||||
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
||||
lastCBlockSize = cSize;
|
||||
} }
|
||||
@@ -807,6 +816,15 @@ typedef struct {
|
||||
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
||||
|
||||
#define RSYNC_LENGTH 32
|
||||
/* Don't create chunks smaller than the zstd block size.
|
||||
* This stops us from regressing compression ratio too much,
|
||||
* and ensures our output fits in ZSTD_compressBound().
|
||||
*
|
||||
* If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
|
||||
* ZSTD_COMPRESSBOUND() will need to be updated.
|
||||
*/
|
||||
#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
|
||||
#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
|
||||
|
||||
typedef struct {
|
||||
U64 hash;
|
||||
@@ -927,7 +945,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
|
||||
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
|
||||
assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
|
||||
mtctx->jobIDMask = nbJobs - 1;
|
||||
mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
||||
mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
||||
mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
|
||||
mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
|
||||
initError = ZSTDMT_serialState_init(&mtctx->serial);
|
||||
@@ -1030,7 +1048,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
||||
{
|
||||
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
||||
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
|
||||
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
||||
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
|
||||
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
||||
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
||||
if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
|
||||
@@ -1135,7 +1153,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
||||
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
||||
{
|
||||
unsigned jobLog;
|
||||
if (params->ldmParams.enableLdm) {
|
||||
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on cycleLog instead. */
|
||||
@@ -1179,7 +1197,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
||||
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
||||
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
||||
assert(0 <= overlapRLog && overlapRLog <= 8);
|
||||
if (params->ldmParams.enableLdm) {
|
||||
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on chainLog instead.
|
||||
@@ -1252,6 +1270,9 @@ size_t ZSTDMT_initCStream_internal(
|
||||
/* Aim for the targetsectionSize as the average job size. */
|
||||
U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
|
||||
U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
|
||||
/* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
|
||||
* expected job size is at least 4x larger. */
|
||||
assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
|
||||
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
||||
mtctx->rsync.hash = 0;
|
||||
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
||||
@@ -1263,7 +1284,7 @@ size_t ZSTDMT_initCStream_internal(
|
||||
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
||||
{
|
||||
/* If ldm is enabled we need windowSize space. */
|
||||
size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
|
||||
size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
|
||||
/* Two buffers of slack, plus extra space for the overlap
|
||||
* This is the minimum slack that LDM works with. One extra because
|
||||
* flush might waste up to targetSectionSize-1 bytes. Another extra
|
||||
@@ -1538,17 +1559,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
|
||||
static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
||||
{
|
||||
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
||||
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
||||
BYTE const* const rangeStart = (BYTE const*)range.start;
|
||||
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
||||
|
||||
if (rangeStart == NULL || bufferStart == NULL)
|
||||
return 0;
|
||||
/* Empty ranges cannot overlap */
|
||||
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
||||
return 0;
|
||||
|
||||
return bufferStart < rangeEnd && rangeStart < bufferEnd;
|
||||
{
|
||||
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
||||
BYTE const* const rangeEnd = rangeStart + range.size;
|
||||
|
||||
/* Empty ranges cannot overlap */
|
||||
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
||||
return 0;
|
||||
|
||||
return bufferStart < rangeEnd && rangeStart < bufferEnd;
|
||||
}
|
||||
}
|
||||
|
||||
static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
||||
@@ -1575,7 +1600,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
||||
|
||||
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
|
||||
{
|
||||
if (mtctx->params.ldmParams.enableLdm) {
|
||||
if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
||||
ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
|
||||
DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
|
||||
DEBUGLOG(5, "source [0x%zx, 0x%zx)",
|
||||
@@ -1678,6 +1703,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
if (!mtctx->params.rsyncable)
|
||||
/* Rsync is disabled. */
|
||||
return syncPoint;
|
||||
if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
|
||||
/* We don't emit synchronization points if it would produce too small blocks.
|
||||
* We don't have enough input to find a synchronization point, so don't look.
|
||||
*/
|
||||
return syncPoint;
|
||||
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
||||
/* Not enough to compute the hash.
|
||||
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
||||
@@ -1688,10 +1718,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
*/
|
||||
return syncPoint;
|
||||
/* Initialize the loop variables. */
|
||||
if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
|
||||
/* We have enough bytes buffered to initialize the hash.
|
||||
if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
|
||||
/* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
|
||||
* because they can't possibly be a sync point. So we can start
|
||||
* part way through the input buffer.
|
||||
*/
|
||||
pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
|
||||
if (pos >= RSYNC_LENGTH) {
|
||||
prev = istart + pos - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
||||
} else {
|
||||
assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
|
||||
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
||||
}
|
||||
} else {
|
||||
/* We have enough bytes buffered to initialize the hash,
|
||||
* and have processed enough bytes to find a sync point.
|
||||
* Start scanning at the beginning of the input.
|
||||
*/
|
||||
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
|
||||
assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
|
||||
pos = 0;
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
||||
@@ -1705,16 +1753,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
syncPoint.flush = 1;
|
||||
return syncPoint;
|
||||
}
|
||||
} else {
|
||||
/* We don't have enough bytes buffered to initialize the hash, but
|
||||
* we know we have at least RSYNC_LENGTH bytes total.
|
||||
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
||||
* already buffered.
|
||||
*/
|
||||
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
||||
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
||||
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
||||
}
|
||||
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
||||
* through the input. If we hit a synchronization point, then cut the
|
||||
@@ -1724,16 +1762,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
* then a block will be emitted anyways, but this is okay, since if we
|
||||
* are already synchronized we will remain synchronized.
|
||||
*/
|
||||
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
||||
for (; pos < syncPoint.toLoad; ++pos) {
|
||||
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
||||
/* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
|
||||
/* This assert is very expensive, and Debian compiles with asserts enabled.
|
||||
* So disable it for now. We can get similar coverage by checking it at the
|
||||
* beginning & end of the loop.
|
||||
* assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
||||
*/
|
||||
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
||||
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
|
||||
if ((hash & hitMask) == hitMask) {
|
||||
syncPoint.toLoad = pos + 1;
|
||||
syncPoint.flush = 1;
|
||||
++pos; /* for assert */
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
||||
return syncPoint;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -65,8 +65,11 @@ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
* Private use only. Init streaming operation.
|
||||
* expects params to be valid.
|
||||
* must receive dict, or cdict, or none, but not both.
|
||||
* mtctx can be freshly constructed or reused from a prior compression.
|
||||
* If mtctx is reused, memory allocations from the prior compression may not be freed,
|
||||
* even if they are not needed for the current compression.
|
||||
* @return : 0, or an error code */
|
||||
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
||||
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -14,12 +14,12 @@
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
||||
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "../common/cpu.h" /* bmi2 */
|
||||
#include "../common/mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "../common/fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
#include "../common/huf.h"
|
||||
#include "zstd_decompress_internal.h"
|
||||
#include "zstd_ddict.h"
|
||||
@@ -134,7 +134,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
||||
ZSTD_memcpy(internalBuffer, dict, dictSize);
|
||||
}
|
||||
ddict->dictSize = dictSize;
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
|
||||
|
||||
/* parse dictionary content */
|
||||
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
|
||||
@@ -240,5 +240,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
|
||||
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
|
||||
{
|
||||
if (ddict==NULL) return 0;
|
||||
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
|
||||
return ddict->dictID;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -55,18 +55,18 @@
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
||||
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "../common/cpu.h" /* bmi2 */
|
||||
#include "../common/mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "../common/fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
#include "../common/huf.h"
|
||||
#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
|
||||
#include "../common/zstd_internal.h" /* blockProperties_t */
|
||||
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
||||
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
||||
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
|
||||
#include "../common/bits.h" /* ZSTD_highbit32 */
|
||||
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
# include "../legacy/zstd_legacy.h"
|
||||
@@ -79,11 +79,11 @@
|
||||
*************************************/
|
||||
|
||||
#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
|
||||
#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
|
||||
* Currently, that means a 0.75 load factor.
|
||||
* So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
|
||||
* the load factor of the ddict hash set.
|
||||
*/
|
||||
#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
|
||||
* Currently, that means a 0.75 load factor.
|
||||
* So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
|
||||
* the load factor of the ddict hash set.
|
||||
*/
|
||||
|
||||
#define DDICT_HASHSET_TABLE_BASE_SIZE 64
|
||||
#define DDICT_HASHSET_RESIZE_FACTOR 2
|
||||
@@ -177,12 +177,15 @@ static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet,
|
||||
static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
|
||||
ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
|
||||
DEBUGLOG(4, "Allocating new hash set");
|
||||
if (!ret)
|
||||
return NULL;
|
||||
ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
|
||||
ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
|
||||
ret->ddictPtrCount = 0;
|
||||
if (!ret || !ret->ddictPtrTable) {
|
||||
if (!ret->ddictPtrTable) {
|
||||
ZSTD_customFree(ret, customMem);
|
||||
return NULL;
|
||||
}
|
||||
ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
|
||||
ret->ddictPtrCount = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -241,6 +244,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
|
||||
dctx->outBufferMode = ZSTD_bm_buffered;
|
||||
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
|
||||
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
|
||||
dctx->disableHufAsm = 0;
|
||||
}
|
||||
|
||||
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
@@ -255,11 +259,15 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
dctx->inBuffSize = 0;
|
||||
dctx->outBuffSize = 0;
|
||||
dctx->streamStage = zdss_init;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
dctx->legacyContext = NULL;
|
||||
dctx->previousLegacyVersion = 0;
|
||||
#endif
|
||||
dctx->noForwardProgress = 0;
|
||||
dctx->oversizedDuration = 0;
|
||||
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
#if DYNAMIC_BMI2
|
||||
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
|
||||
#endif
|
||||
dctx->ddictSet = NULL;
|
||||
ZSTD_DCtx_resetParameters(dctx);
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
@@ -280,8 +288,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
|
||||
return dctx;
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
|
||||
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
||||
|
||||
{ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
|
||||
@@ -292,10 +299,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
return ZSTD_createDCtx_internal(customMem);
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx(void)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createDCtx");
|
||||
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
|
||||
return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
static void ZSTD_clearDict(ZSTD_DCtx* dctx)
|
||||
@@ -380,6 +392,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*! ZSTD_isSkippableFrame() :
|
||||
* Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
|
||||
* Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
|
||||
*/
|
||||
unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
|
||||
{
|
||||
if (size < ZSTD_FRAMEIDSIZE) return 0;
|
||||
{ U32 const magic = MEM_readLE32(buffer);
|
||||
if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** ZSTD_frameHeaderSize_internal() :
|
||||
* srcSize must be large enough to reach header size fields.
|
||||
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
|
||||
@@ -415,16 +440,40 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
|
||||
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
|
||||
* @return : 0, `zfhPtr` is correctly filled,
|
||||
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
|
||||
* or an error code, which can be tested using ZSTD_isError() */
|
||||
** or an error code, which can be tested using ZSTD_isError() */
|
||||
size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t const minInputSize = ZSTD_startingInputLength(format);
|
||||
|
||||
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
|
||||
if (srcSize < minInputSize) return minInputSize;
|
||||
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
|
||||
DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize);
|
||||
|
||||
if (srcSize > 0) {
|
||||
/* note : technically could be considered an assert(), since it's an invalid entry */
|
||||
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
|
||||
}
|
||||
if (srcSize < minInputSize) {
|
||||
if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) {
|
||||
/* when receiving less than @minInputSize bytes,
|
||||
* control these bytes at least correspond to a supported magic number
|
||||
* in order to error out early if they don't.
|
||||
**/
|
||||
size_t const toCopy = MIN(4, srcSize);
|
||||
unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
|
||||
assert(src != NULL);
|
||||
ZSTD_memcpy(hbuf, src, toCopy);
|
||||
if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) {
|
||||
/* not a zstd frame : let's check if it's a skippable frame */
|
||||
MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
|
||||
ZSTD_memcpy(hbuf, src, toCopy);
|
||||
if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
RETURN_ERROR(prefix_unknown,
|
||||
"first bytes don't correspond to any supported magic number");
|
||||
} } }
|
||||
return minInputSize;
|
||||
}
|
||||
|
||||
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
|
||||
if ( (format != ZSTD_f_zstd1_magicless)
|
||||
&& (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
|
||||
if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
@@ -466,7 +515,9 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
}
|
||||
switch(dictIDSizeCode)
|
||||
{
|
||||
default: assert(0); /* impossible */
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
ZSTD_FALLTHROUGH;
|
||||
case 0 : break;
|
||||
case 1 : dictID = ip[pos]; pos++; break;
|
||||
case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
|
||||
@@ -474,7 +525,9 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
}
|
||||
switch(fcsID)
|
||||
{
|
||||
default: assert(0); /* impossible */
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
ZSTD_FALLTHROUGH;
|
||||
case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
|
||||
case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
|
||||
case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
|
||||
@@ -503,7 +556,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
|
||||
return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_getFrameContentSize() :
|
||||
* compatible with legacy mode
|
||||
* @return : decompressed size of the single frame pointed to be `src` if known, otherwise
|
||||
@@ -537,18 +589,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
||||
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
|
||||
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
|
||||
frameParameter_unsupported, "");
|
||||
{
|
||||
size_t const skippableSize = skippableHeaderSize + sizeU32;
|
||||
{ size_t const skippableSize = skippableHeaderSize + sizeU32;
|
||||
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
|
||||
return skippableSize;
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_readSkippableFrame() :
|
||||
* Retrieves content of a skippable frame, and writes it to dst buffer.
|
||||
*
|
||||
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
|
||||
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
|
||||
* in the magicVariant.
|
||||
*
|
||||
* Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
|
||||
*
|
||||
* @return : number of bytes written or a ZSTD error.
|
||||
*/
|
||||
size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
|
||||
unsigned* magicVariant, /* optional, can be NULL */
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
|
||||
|
||||
{ U32 const magicNumber = MEM_readLE32(src);
|
||||
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
|
||||
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
|
||||
|
||||
/* check input validity */
|
||||
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
|
||||
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
|
||||
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
|
||||
|
||||
/* deliver payload */
|
||||
if (skippableContentSize > 0 && dst != NULL)
|
||||
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
|
||||
if (magicVariant != NULL)
|
||||
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
|
||||
return skippableContentSize;
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_findDecompressedSize() :
|
||||
* compatible with legacy mode
|
||||
* `srcSize` must be the exact length of some number of ZSTD compressed and/or
|
||||
* skippable frames
|
||||
* @return : decompressed size of the frames contained */
|
||||
* note: compatible with legacy mode
|
||||
* @return : decompressed size of the frames contained */
|
||||
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned long long totalDstSize = 0;
|
||||
@@ -558,9 +644,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
||||
|
||||
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
if (ZSTD_isError(skippableSize)) {
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
|
||||
assert(skippableSize <= srcSize);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
@@ -568,17 +652,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
||||
continue;
|
||||
}
|
||||
|
||||
{ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
|
||||
if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
|
||||
{ unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
|
||||
if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
|
||||
|
||||
/* check for overflow */
|
||||
if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
|
||||
totalDstSize += ret;
|
||||
if (totalDstSize + fcs < totalDstSize)
|
||||
return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
|
||||
totalDstSize += fcs;
|
||||
}
|
||||
/* skip to next frame */
|
||||
{ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
|
||||
if (ZSTD_isError(frameSrcSize)) {
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
|
||||
assert(frameSrcSize <= srcSize);
|
||||
|
||||
src = (const BYTE *)src + frameSrcSize;
|
||||
srcSize -= frameSrcSize;
|
||||
@@ -700,10 +784,11 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
|
||||
ip += 4;
|
||||
}
|
||||
|
||||
frameSizeInfo.nbBlocks = nbBlocks;
|
||||
frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
|
||||
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
|
||||
? zfh.frameContentSize
|
||||
: nbBlocks * zfh.blockSizeMax;
|
||||
: (unsigned long long)nbBlocks * zfh.blockSizeMax;
|
||||
return frameSizeInfo;
|
||||
}
|
||||
}
|
||||
@@ -743,6 +828,48 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
|
||||
return bound;
|
||||
}
|
||||
|
||||
size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
|
||||
{
|
||||
size_t margin = 0;
|
||||
unsigned maxBlockSize = 0;
|
||||
|
||||
/* Iterate over each frame */
|
||||
while (srcSize > 0) {
|
||||
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
|
||||
size_t const compressedSize = frameSizeInfo.compressedSize;
|
||||
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
|
||||
ZSTD_frameHeader zfh;
|
||||
|
||||
FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
|
||||
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
|
||||
return ERROR(corruption_detected);
|
||||
|
||||
if (zfh.frameType == ZSTD_frame) {
|
||||
/* Add the frame header to our margin */
|
||||
margin += zfh.headerSize;
|
||||
/* Add the checksum to our margin */
|
||||
margin += zfh.checksumFlag ? 4 : 0;
|
||||
/* Add 3 bytes per block */
|
||||
margin += 3 * frameSizeInfo.nbBlocks;
|
||||
|
||||
/* Compute the max block size */
|
||||
maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
|
||||
} else {
|
||||
assert(zfh.frameType == ZSTD_skippableFrame);
|
||||
/* Add the entire skippable frame size to our margin. */
|
||||
margin += compressedSize;
|
||||
}
|
||||
|
||||
assert(srcSize >= compressedSize);
|
||||
src = (const BYTE*)src + compressedSize;
|
||||
srcSize -= compressedSize;
|
||||
}
|
||||
|
||||
/* Add the max block size back to the margin. */
|
||||
margin += maxBlockSize;
|
||||
|
||||
return margin;
|
||||
}
|
||||
|
||||
/*-*************************************************************
|
||||
* Frame decoding
|
||||
@@ -768,7 +895,7 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
|
||||
if (srcSize == 0) return 0;
|
||||
RETURN_ERROR(dstBuffer_null, "");
|
||||
}
|
||||
ZSTD_memcpy(dst, src, srcSize);
|
||||
ZSTD_memmove(dst, src, srcSize);
|
||||
return srcSize;
|
||||
}
|
||||
|
||||
@@ -846,6 +973,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
|
||||
/* Loop on each block */
|
||||
while (1) {
|
||||
BYTE* oBlockEnd = oend;
|
||||
size_t decodedSize;
|
||||
blockProperties_t blockProperties;
|
||||
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
|
||||
@@ -855,16 +983,34 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
remainingSrcSize -= ZSTD_blockHeaderSize;
|
||||
RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
|
||||
|
||||
if (ip >= op && ip < oBlockEnd) {
|
||||
/* We are decompressing in-place. Limit the output pointer so that we
|
||||
* don't overwrite the block that we are currently reading. This will
|
||||
* fail decompression if the input & output pointers aren't spaced
|
||||
* far enough apart.
|
||||
*
|
||||
* This is important to set, even when the pointers are far enough
|
||||
* apart, because ZSTD_decompressBlock_internal() can decide to store
|
||||
* literals in the output buffer, after the block it is decompressing.
|
||||
* Since we don't want anything to overwrite our input, we have to tell
|
||||
* ZSTD_decompressBlock_internal to never write past ip.
|
||||
*
|
||||
* See ZSTD_allocateLiteralsBuffer() for reference.
|
||||
*/
|
||||
oBlockEnd = op + (ip - op);
|
||||
}
|
||||
|
||||
switch(blockProperties.blockType)
|
||||
{
|
||||
case bt_compressed:
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
|
||||
break;
|
||||
case bt_raw :
|
||||
/* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
|
||||
decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
|
||||
break;
|
||||
case bt_rle :
|
||||
decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
|
||||
decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize);
|
||||
break;
|
||||
case bt_reserved :
|
||||
default:
|
||||
@@ -899,6 +1045,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
}
|
||||
ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
|
||||
/* Allow caller to get size read */
|
||||
DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
|
||||
*srcPtr = ip;
|
||||
*srcSizePtr = remainingSrcSize;
|
||||
return (size_t)(op-ostart);
|
||||
@@ -945,17 +1092,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
}
|
||||
#endif
|
||||
|
||||
{ U32 const magicNumber = MEM_readLE32(src);
|
||||
DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
|
||||
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
|
||||
if (srcSize >= 4) {
|
||||
U32 const magicNumber = MEM_readLE32(src);
|
||||
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
|
||||
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
/* skippable frame detected : skip it */
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
|
||||
FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
|
||||
assert(skippableSize <= srcSize);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
srcSize -= skippableSize;
|
||||
continue;
|
||||
continue; /* check next frame */
|
||||
} }
|
||||
|
||||
if (ddict) {
|
||||
@@ -1009,7 +1157,7 @@ static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
|
||||
switch (dctx->dictUses) {
|
||||
default:
|
||||
assert(0 /* Impossible */);
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTD_dont_use:
|
||||
ZSTD_clearDict(dctx);
|
||||
return NULL;
|
||||
@@ -1031,7 +1179,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
|
||||
{
|
||||
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
|
||||
size_t regenSize;
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
|
||||
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
@@ -1051,8 +1199,8 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
|
||||
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
|
||||
|
||||
/**
|
||||
* Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
|
||||
* we allow taking a partial block as the input. Currently only raw uncompressed blocks can
|
||||
* Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
|
||||
* allow taking a partial block as the input. Currently only raw uncompressed blocks can
|
||||
* be streamed.
|
||||
*
|
||||
* For blocks that can be streamed, this allows us to reduce the latency until we produce
|
||||
@@ -1065,7 +1213,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t
|
||||
return dctx->expected;
|
||||
if (dctx->bType != bt_raw)
|
||||
return dctx->expected;
|
||||
return MIN(MAX(inputSize, 1), dctx->expected);
|
||||
return BOUNDED(1, inputSize, dctx->expected);
|
||||
}
|
||||
|
||||
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
|
||||
@@ -1073,7 +1221,9 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
|
||||
{
|
||||
default: /* should not happen */
|
||||
assert(0);
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTDds_getFrameHeaderSize:
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTDds_decodeFrameHeader:
|
||||
return ZSTDnit_frameHeader;
|
||||
case ZSTDds_decodeBlockHeader:
|
||||
@@ -1085,6 +1235,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
|
||||
case ZSTDds_checkChecksum:
|
||||
return ZSTDnit_checksum;
|
||||
case ZSTDds_decodeSkippableHeader:
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTDds_skipFrame:
|
||||
return ZSTDnit_skippableFrame;
|
||||
}
|
||||
@@ -1168,7 +1319,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{
|
||||
case bt_compressed:
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
|
||||
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
|
||||
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
|
||||
dctx->expected = 0; /* Streaming not supported */
|
||||
break;
|
||||
case bt_raw :
|
||||
@@ -1249,7 +1400,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
|
||||
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1290,11 +1441,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
/* in minimal huffman, we always use X1 variants */
|
||||
size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
|
||||
dictPtr, dictEnd - dictPtr,
|
||||
workspace, workspaceSize);
|
||||
workspace, workspaceSize, /* flags */ 0);
|
||||
#else
|
||||
size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
|
||||
dictPtr, (size_t)(dictEnd - dictPtr),
|
||||
workspace, workspaceSize);
|
||||
workspace, workspaceSize, /* flags */ 0);
|
||||
#endif
|
||||
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
|
||||
dictPtr += hSize;
|
||||
@@ -1393,7 +1544,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
|
||||
dctx->prefixStart = NULL;
|
||||
dctx->virtualStart = NULL;
|
||||
dctx->dictEnd = NULL;
|
||||
dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
||||
dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
|
||||
dctx->litEntropy = dctx->fseEntropy = 0;
|
||||
dctx->dictID = 0;
|
||||
dctx->bType = bt_reserved;
|
||||
@@ -1455,7 +1606,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
|
||||
* This could for one of the following reasons :
|
||||
* - The frame does not require a dictionary (most common case).
|
||||
* - The frame was built with dictID intentionally removed.
|
||||
* Needed dictionary is a hidden information.
|
||||
* Needed dictionary is a hidden piece of information.
|
||||
* Note : this use case also happens when using a non-conformant dictionary.
|
||||
* - `srcSize` is too small, and as a result, frame header could not be decoded.
|
||||
* Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
|
||||
@@ -1464,7 +1615,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
|
||||
* ZSTD_getFrameHeader(), which will provide a more precise error code. */
|
||||
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
|
||||
ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
|
||||
size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
|
||||
if (ZSTD_isError(hError)) return 0;
|
||||
return zfp.dictID;
|
||||
@@ -1493,7 +1644,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
|
||||
ZSTD_DStream* ZSTD_createDStream(void)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createDStream");
|
||||
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
|
||||
return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
|
||||
@@ -1503,7 +1654,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
|
||||
|
||||
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
return ZSTD_createDCtx_advanced(customMem);
|
||||
return ZSTD_createDCtx_internal(customMem);
|
||||
}
|
||||
|
||||
size_t ZSTD_freeDStream(ZSTD_DStream* zds)
|
||||
@@ -1571,7 +1722,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
|
||||
size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_initDStream");
|
||||
return ZSTD_initDStream_usingDDict(zds, NULL);
|
||||
FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
|
||||
FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
|
||||
return ZSTD_startingInputLength(zds->format);
|
||||
}
|
||||
|
||||
/* ZSTD_initDStream_usingDDict() :
|
||||
@@ -1579,6 +1732,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
||||
* this function cannot fail */
|
||||
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
|
||||
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
|
||||
return ZSTD_startingInputLength(dctx->format);
|
||||
@@ -1589,6 +1743,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
|
||||
* this function cannot fail */
|
||||
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_resetDStream");
|
||||
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
|
||||
return ZSTD_startingInputLength(dctx->format);
|
||||
}
|
||||
@@ -1660,6 +1815,11 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
|
||||
bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
|
||||
bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
|
||||
return bounds;
|
||||
case ZSTD_d_disableHuffmanAssembly:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = 1;
|
||||
return bounds;
|
||||
|
||||
default:;
|
||||
}
|
||||
bounds.error = ERROR(parameter_unsupported);
|
||||
@@ -1700,6 +1860,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
|
||||
case ZSTD_d_refMultipleDDicts:
|
||||
*value = (int)dctx->refMultipleDDicts;
|
||||
return 0;
|
||||
case ZSTD_d_disableHuffmanAssembly:
|
||||
*value = (int)dctx->disableHufAsm;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
RETURN_ERROR(parameter_unsupported, "");
|
||||
@@ -1733,6 +1896,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
|
||||
}
|
||||
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
|
||||
return 0;
|
||||
case ZSTD_d_disableHuffmanAssembly:
|
||||
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
|
||||
dctx->disableHufAsm = value != 0;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
RETURN_ERROR(parameter_unsupported, "");
|
||||
@@ -1763,7 +1930,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
|
||||
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
|
||||
{
|
||||
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
|
||||
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
|
||||
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
|
||||
size_t const minRBSize = (size_t) neededSize;
|
||||
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
|
||||
@@ -1897,10 +2065,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
DEBUGLOG(5, "stage zdss_init => transparent reset ");
|
||||
zds->streamStage = zdss_loadHeader;
|
||||
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
zds->legacyVersion = 0;
|
||||
#endif
|
||||
zds->hostageByte = 0;
|
||||
zds->expectedOutBuffer = *output;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case zdss_loadHeader :
|
||||
DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
|
||||
@@ -1917,7 +2087,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if (zds->refMultipleDDicts && zds->ddictSet) {
|
||||
ZSTD_DCtx_selectFrameDDict(zds);
|
||||
}
|
||||
DEBUGLOG(5, "header size : %u", (U32)hSize);
|
||||
if (ZSTD_isError(hSize)) {
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
|
||||
@@ -1949,6 +2118,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
zds->lhSize += remainingInput;
|
||||
}
|
||||
input->pos = input->size;
|
||||
/* check first few bytes */
|
||||
FORWARD_IF_ERROR(
|
||||
ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format),
|
||||
"First few bytes detected incorrect" );
|
||||
/* return hint input size */
|
||||
return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
|
||||
}
|
||||
assert(ip != NULL);
|
||||
@@ -1966,8 +2140,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
|
||||
if (ZSTD_isError(decompressedSize)) return decompressedSize;
|
||||
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
|
||||
assert(istart != NULL);
|
||||
ip = istart + cSize;
|
||||
op += decompressedSize;
|
||||
op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
|
||||
zds->expected = 0;
|
||||
zds->streamStage = zdss_init;
|
||||
someMoreWork = 0;
|
||||
@@ -2038,7 +2213,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
zds->outBuffSize = neededOutBuffSize;
|
||||
} } }
|
||||
zds->streamStage = zdss_read;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case zdss_read:
|
||||
DEBUGLOG(5, "stage zdss_read");
|
||||
@@ -2051,13 +2226,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
}
|
||||
if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
|
||||
FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
|
||||
assert(ip != NULL);
|
||||
ip += neededInSize;
|
||||
/* Function modifies the stage so we must break */
|
||||
break;
|
||||
} }
|
||||
if (ip==iend) { someMoreWork = 0; break; } /* no more input */
|
||||
zds->streamStage = zdss_load;
|
||||
/* fall-through */
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case zdss_load:
|
||||
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
|
||||
@@ -2065,7 +2241,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
int const isSkipFrame = ZSTD_isSkipFrame(zds);
|
||||
size_t loadedSize;
|
||||
/* At this point we shouldn't be decompressing a block that we can stream. */
|
||||
assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
|
||||
assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)));
|
||||
if (isSkipFrame) {
|
||||
loadedSize = MIN(toLoad, (size_t)(iend-ip));
|
||||
} else {
|
||||
@@ -2074,8 +2250,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
"should never happen");
|
||||
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
|
||||
}
|
||||
ip += loadedSize;
|
||||
zds->inPos += loadedSize;
|
||||
if (loadedSize != 0) {
|
||||
/* ip may be NULL */
|
||||
ip += loadedSize;
|
||||
zds->inPos += loadedSize;
|
||||
}
|
||||
if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
|
||||
|
||||
/* decode loaded input */
|
||||
@@ -2085,14 +2264,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
break;
|
||||
}
|
||||
case zdss_flush:
|
||||
{ size_t const toFlushSize = zds->outEnd - zds->outStart;
|
||||
{
|
||||
size_t const toFlushSize = zds->outEnd - zds->outStart;
|
||||
size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
|
||||
op += flushedSize;
|
||||
|
||||
op = op ? op + flushedSize : op;
|
||||
|
||||
zds->outStart += flushedSize;
|
||||
if (flushedSize == toFlushSize) { /* flush completed */
|
||||
zds->streamStage = zdss_read;
|
||||
if ( (zds->outBuffSize < zds->fParams.frameContentSize)
|
||||
&& (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
|
||||
&& (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
|
||||
DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
|
||||
(int)(zds->outBuffSize - zds->outStart),
|
||||
(U32)zds->fParams.blockSizeMax);
|
||||
@@ -2106,7 +2288,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
default:
|
||||
assert(0); /* impossible */
|
||||
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
|
||||
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
|
||||
} }
|
||||
|
||||
/* result */
|
||||
@@ -2119,8 +2301,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if ((ip==istart) && (op==ostart)) { /* no forward progress */
|
||||
zds->noForwardProgress ++;
|
||||
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
|
||||
RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
|
||||
RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
|
||||
RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, "");
|
||||
RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, "");
|
||||
assert(0);
|
||||
}
|
||||
} else {
|
||||
@@ -2157,11 +2339,17 @@ size_t ZSTD_decompressStream_simpleArgs (
|
||||
void* dst, size_t dstCapacity, size_t* dstPos,
|
||||
const void* src, size_t srcSize, size_t* srcPos)
|
||||
{
|
||||
ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
|
||||
ZSTD_inBuffer input = { src, srcSize, *srcPos };
|
||||
/* ZSTD_compress_generic() will check validity of dstPos and srcPos */
|
||||
size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
|
||||
*dstPos = output.pos;
|
||||
*srcPos = input.pos;
|
||||
return cErr;
|
||||
ZSTD_outBuffer output;
|
||||
ZSTD_inBuffer input;
|
||||
output.dst = dst;
|
||||
output.size = dstCapacity;
|
||||
output.pos = *dstPos;
|
||||
input.src = src;
|
||||
input.size = srcSize;
|
||||
input.pos = *srcPos;
|
||||
{ size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
|
||||
*dstPos = output.pos;
|
||||
*srcPos = input.pos;
|
||||
return cErr;
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -33,6 +33,12 @@
|
||||
*/
|
||||
|
||||
|
||||
/* Streaming state is used to inform allocation of the literal buffer */
|
||||
typedef enum {
|
||||
not_streaming = 0,
|
||||
is_streaming = 1
|
||||
} streaming_operation;
|
||||
|
||||
/* ZSTD_decompressBlock_internal() :
|
||||
* decompress block, starting at `src`,
|
||||
* into destination buffer `dst`.
|
||||
@@ -41,7 +47,7 @@
|
||||
*/
|
||||
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize, const int frame);
|
||||
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
|
||||
|
||||
/* ZSTD_buildFSETable() :
|
||||
* generate FSE decoding table for one symbol (ll, ml or off)
|
||||
@@ -54,9 +60,14 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
*/
|
||||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
const U32* baseValue, const U8* nbAdditionalBits,
|
||||
unsigned tableLog, void* wksp, size_t wkspSize,
|
||||
int bmi2);
|
||||
|
||||
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
|
||||
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
|
||||
#endif /* ZSTD_DEC_BLOCK_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -20,7 +20,7 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/mem.h" /* BYTE, U16, U32 */
|
||||
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
||||
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
||||
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
||||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
||||
|
||||
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
||||
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
||||
|
||||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
||||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
||||
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
|
||||
|
||||
typedef struct {
|
||||
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
||||
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
||||
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
||||
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
||||
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
|
||||
U32 rep[ZSTD_REP_NUM];
|
||||
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
||||
} ZSTD_entropyDTables_t;
|
||||
@@ -106,6 +107,22 @@ typedef struct {
|
||||
size_t ddictPtrCount;
|
||||
} ZSTD_DDictHashSet;
|
||||
|
||||
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
||||
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
||||
#endif
|
||||
|
||||
#define ZSTD_LBMIN 64
|
||||
#define ZSTD_LBMAX (128 << 10)
|
||||
|
||||
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
||||
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
||||
|
||||
typedef enum {
|
||||
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
||||
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
||||
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
||||
} ZSTD_litLocation_e;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
@@ -136,7 +153,9 @@ struct ZSTD_DCtx_s
|
||||
size_t litSize;
|
||||
size_t rleSize;
|
||||
size_t staticSize;
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
||||
#endif
|
||||
|
||||
/* dictionary */
|
||||
ZSTD_DDict* ddictLocal;
|
||||
@@ -146,6 +165,7 @@ struct ZSTD_DCtx_s
|
||||
ZSTD_dictUses_e dictUses;
|
||||
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
||||
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
||||
int disableHufAsm;
|
||||
|
||||
/* streaming */
|
||||
ZSTD_dStreamStage streamStage;
|
||||
@@ -158,16 +178,21 @@ struct ZSTD_DCtx_s
|
||||
size_t outStart;
|
||||
size_t outEnd;
|
||||
size_t lhSize;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
void* legacyContext;
|
||||
U32 previousLegacyVersion;
|
||||
U32 legacyVersion;
|
||||
#endif
|
||||
U32 hostageByte;
|
||||
int noForwardProgress;
|
||||
ZSTD_bufferMode_e outBufferMode;
|
||||
ZSTD_outBuffer expectedOutBuffer;
|
||||
|
||||
/* workspace */
|
||||
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
||||
BYTE* litBuffer;
|
||||
const BYTE* litBufferEnd;
|
||||
ZSTD_litLocation_e litBufferLocation;
|
||||
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
||||
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
||||
|
||||
size_t oversizedDuration;
|
||||
@@ -183,6 +208,14 @@ struct ZSTD_DCtx_s
|
||||
#endif
|
||||
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
||||
|
||||
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
return dctx->bmi2;
|
||||
#else
|
||||
(void)dctx;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*-*******************************************************
|
||||
* Shared internal functions
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -15,6 +15,7 @@
|
||||
***************************************/
|
||||
#define ZBUFF_STATIC_LINKING_ONLY
|
||||
#include "zbuff.h"
|
||||
#include "../common/error_private.h"
|
||||
|
||||
|
||||
/*-***********************************************************
|
||||
@@ -73,13 +74,32 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
|
||||
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
||||
{
|
||||
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
|
||||
return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
|
||||
}
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), "");
|
||||
|
||||
FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), "");
|
||||
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), "");
|
||||
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel);
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), "");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), "");
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -13,6 +13,8 @@
|
||||
/* *************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* suppress warning on ZSTD_initDStream_usingDict */
|
||||
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
||||
#define ZBUFF_STATIC_LINKING_ONLY
|
||||
#include "zbuff.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -34,12 +34,20 @@
|
||||
#include "../common/pool.h"
|
||||
#include "../common/threading.h"
|
||||
#include "../common/zstd_internal.h" /* includes zstd.h */
|
||||
#include "../common/bits.h" /* ZSTD_highbit32 */
|
||||
#include "../zdict.h"
|
||||
#include "cover.h"
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
/**
|
||||
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
||||
* on 64bit builds.
|
||||
* For 32bit builds we choose 1 GB.
|
||||
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
||||
* contiguous buffer, so 1GB is already a high limit.
|
||||
*/
|
||||
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
||||
#define COVER_DEFAULT_SPLITPOINT 1.0
|
||||
|
||||
@@ -47,7 +55,7 @@
|
||||
* Console display
|
||||
***************************************/
|
||||
#ifndef LOCALDISPLAYLEVEL
|
||||
static int g_displayLevel = 2;
|
||||
static int g_displayLevel = 0;
|
||||
#endif
|
||||
#undef DISPLAY
|
||||
#define DISPLAY(...) \
|
||||
@@ -534,7 +542,7 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
||||
|
||||
/**
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* The context is only dependent on the parameter `d` and can be used multiple
|
||||
* times.
|
||||
* Returns 0 on success or error code on error.
|
||||
* The context must be destroyed with `COVER_ctx_destroy()`.
|
||||
@@ -639,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
|
||||
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
||||
{
|
||||
const double ratio = (double)nbDmers / maxDictSize;
|
||||
const double ratio = (double)nbDmers / (double)maxDictSize;
|
||||
if (ratio >= 10) {
|
||||
return;
|
||||
}
|
||||
@@ -735,7 +743,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
COVER_map_t activeDmers;
|
||||
parameters.splitPoint = 1.0;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
||||
/* Checks */
|
||||
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
||||
@@ -943,9 +951,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
||||
}
|
||||
}
|
||||
|
||||
static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
|
||||
{
|
||||
COVER_dictSelection_t ds;
|
||||
ds.dictContent = buf;
|
||||
ds.dictSize = s;
|
||||
ds.totalCompressedSize = csz;
|
||||
return ds;
|
||||
}
|
||||
|
||||
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
||||
COVER_dictSelection_t selection = { NULL, 0, error };
|
||||
return selection;
|
||||
return setDictSelection(NULL, 0, error);
|
||||
}
|
||||
|
||||
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
||||
@@ -998,9 +1014,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
||||
}
|
||||
|
||||
if (params.shrinkDict == 0) {
|
||||
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
||||
free(candidateDictBuffer);
|
||||
return selection;
|
||||
return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
|
||||
}
|
||||
|
||||
largestDict = dictContentSize;
|
||||
@@ -1032,20 +1047,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
||||
return COVER_dictSelectionError(totalCompressedSize);
|
||||
}
|
||||
|
||||
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
||||
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
||||
if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
|
||||
free(largestDictbuffer);
|
||||
return selection;
|
||||
return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
|
||||
}
|
||||
dictContentSize *= 2;
|
||||
}
|
||||
dictContentSize = largestDict;
|
||||
totalCompressedSize = largestCompressed;
|
||||
{
|
||||
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
||||
free(candidateDictBuffer);
|
||||
return selection;
|
||||
}
|
||||
free(candidateDictBuffer);
|
||||
return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -32,6 +32,13 @@
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
/**
|
||||
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
||||
* on 64bit builds.
|
||||
* For 32bit builds we choose 1 GB.
|
||||
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
||||
* contiguous buffer, so 1GB is already a high limit.
|
||||
*/
|
||||
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
||||
#define FASTCOVER_MAX_F 31
|
||||
#define FASTCOVER_MAX_ACCEL 10
|
||||
@@ -44,7 +51,7 @@
|
||||
* Console display
|
||||
***************************************/
|
||||
#ifndef LOCALDISPLAYLEVEL
|
||||
static int g_displayLevel = 2;
|
||||
static int g_displayLevel = 0;
|
||||
#endif
|
||||
#undef DISPLAY
|
||||
#define DISPLAY(...) \
|
||||
@@ -297,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
||||
|
||||
/**
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* The context is only dependent on the parameter `d` and can be used multiple
|
||||
* times.
|
||||
* Returns 0 on success or error code on error.
|
||||
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
||||
@@ -549,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
||||
ZDICT_cover_params_t coverParams;
|
||||
FASTCOVER_accel_t accelParams;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
||||
/* Assign splitPoint and f if not provided */
|
||||
parameters.splitPoint = 1.0;
|
||||
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
||||
@@ -632,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
||||
const unsigned shrinkDict = 0;
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
unsigned d;
|
||||
unsigned k;
|
||||
@@ -716,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.shrinkDict = shrinkDict;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
||||
data->ctx->f, accel)) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -44,7 +44,6 @@
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
# define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
|
||||
#include "../common/mem.h" /* read */
|
||||
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
||||
@@ -54,6 +53,7 @@
|
||||
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
||||
#include "../zdict.h"
|
||||
#include "divsufsort.h"
|
||||
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@@ -130,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
||||
/*-********************************************************
|
||||
* Dictionary training functions
|
||||
**********************************************************/
|
||||
static unsigned ZDICT_NbCommonBytes (size_t val)
|
||||
{
|
||||
if (MEM_isLittleEndian()) {
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
_BitScanForward64( &r, (U64)val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
||||
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r=0;
|
||||
_BitScanForward( &r, (U32)val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctz((U32)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
||||
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
||||
# endif
|
||||
}
|
||||
} else { /* Big Endian CPU */
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse64( &r, val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clzll(val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
||||
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
||||
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
||||
r += (!val);
|
||||
return r;
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse( &r, (unsigned long)val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clz((U32)val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
||||
r += (!val);
|
||||
return r;
|
||||
# endif
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
/*! ZDICT_count() :
|
||||
Count the nb of common bytes between 2 pointers.
|
||||
Note : this function presumes end of buffer followed by noisy guard band.
|
||||
@@ -203,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
|
||||
pMatch = (const char*)pMatch+sizeof(size_t);
|
||||
continue;
|
||||
}
|
||||
pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
|
||||
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
|
||||
return (size_t)((const char*)pIn - pStart);
|
||||
}
|
||||
}
|
||||
@@ -235,7 +176,7 @@ static dictItem ZDICT_analyzePos(
|
||||
U32 savings[LLIMIT] = {0};
|
||||
const BYTE* b = (const BYTE*)buffer;
|
||||
size_t maxLength = LLIMIT;
|
||||
size_t pos = suffix[start];
|
||||
size_t pos = (size_t)suffix[start];
|
||||
U32 end = start;
|
||||
dictItem solution;
|
||||
|
||||
@@ -369,7 +310,7 @@ static dictItem ZDICT_analyzePos(
|
||||
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
||||
|
||||
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
||||
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
||||
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
||||
|
||||
solution.pos = (U32)pos;
|
||||
solution.length = (U32)maxLength;
|
||||
@@ -379,7 +320,7 @@ static dictItem ZDICT_analyzePos(
|
||||
{ U32 id;
|
||||
for (id=start; id<end; id++) {
|
||||
U32 p, pEnd, length;
|
||||
U32 const testedPos = suffix[id];
|
||||
U32 const testedPos = (U32)suffix[id];
|
||||
if (testedPos == pos)
|
||||
length = solution.length;
|
||||
else {
|
||||
@@ -431,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
||||
elt = table[u];
|
||||
/* sort : improve rank */
|
||||
while ((u>1) && (table[u-1].savings < elt.savings))
|
||||
table[u] = table[u-1], u--;
|
||||
table[u] = table[u-1], u--;
|
||||
table[u] = elt;
|
||||
return u;
|
||||
} }
|
||||
@@ -442,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
||||
|
||||
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
||||
/* append */
|
||||
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
||||
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
||||
table[u].savings += elt.length / 8; /* rough approx bonus */
|
||||
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
||||
table[u].length += addedLength;
|
||||
@@ -582,7 +523,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
||||
if (solution.length==0) { cursor++; continue; }
|
||||
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
||||
cursor += solution.length;
|
||||
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
||||
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
|
||||
} }
|
||||
|
||||
_cleanup:
|
||||
@@ -625,11 +566,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
||||
size_t cSize;
|
||||
|
||||
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
||||
{ size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
|
||||
{ size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
|
||||
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
||||
|
||||
}
|
||||
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
||||
cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
||||
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
||||
|
||||
if (cSize) { /* if == 0; block is not compressible */
|
||||
@@ -662,8 +603,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
||||
|
||||
if (nbSeq >= 2) { /* rep offsets */
|
||||
const seqDef* const seq = seqStorePtr->sequencesStart;
|
||||
U32 offset1 = seq[0].offset - 3;
|
||||
U32 offset2 = seq[1].offset - 3;
|
||||
U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
|
||||
U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
|
||||
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
||||
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
||||
repOffsets[offset1] += 3;
|
||||
@@ -734,6 +675,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
||||
size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
|
||||
BYTE* dstPtr = (BYTE*)dstBuffer;
|
||||
U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
||||
|
||||
/* init */
|
||||
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
||||
@@ -766,8 +708,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
pos += fileSizes[u];
|
||||
}
|
||||
|
||||
if (notificationLevel >= 4) {
|
||||
/* writeStats */
|
||||
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
||||
for (u=0; u<=offcodeMax; u++) {
|
||||
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
||||
} }
|
||||
|
||||
/* analyze, build stats, starting with literals */
|
||||
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
||||
{ size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
||||
if (HUF_isError(maxNbBits)) {
|
||||
eSize = maxNbBits;
|
||||
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
||||
@@ -776,7 +725,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
||||
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
||||
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
||||
maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
||||
maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
||||
assert(maxNbBits==9);
|
||||
}
|
||||
huffLog = (U32)maxNbBits;
|
||||
@@ -817,7 +766,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
llLog = (U32)errorCode;
|
||||
|
||||
/* write result to buffer */
|
||||
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
||||
{ size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
|
||||
if (HUF_isError(hhSize)) {
|
||||
eSize = hhSize;
|
||||
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
||||
@@ -872,7 +821,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
||||
#else
|
||||
/* at this stage, we don't use the result of "most common first offset",
|
||||
as the impact of statistics is not properly evaluated */
|
||||
* as the impact of statistics is not properly evaluated */
|
||||
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
||||
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
||||
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
||||
@@ -888,6 +837,17 @@ _cleanup:
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @returns the maximum repcode value
|
||||
*/
|
||||
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
||||
{
|
||||
U32 maxRep = reps[0];
|
||||
int r;
|
||||
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
||||
maxRep = MAX(maxRep, reps[r]);
|
||||
return maxRep;
|
||||
}
|
||||
|
||||
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* customDictContent, size_t dictContentSize,
|
||||
@@ -899,11 +859,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||
BYTE header[HBUFFSIZE];
|
||||
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
||||
U32 const notificationLevel = params.notificationLevel;
|
||||
/* The final dictionary content must be at least as large as the largest repcode */
|
||||
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
||||
size_t paddingSize;
|
||||
|
||||
/* check conditions */
|
||||
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
||||
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
||||
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
||||
|
||||
/* dictionary header */
|
||||
@@ -927,12 +889,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||
hSize += eSize;
|
||||
}
|
||||
|
||||
/* copy elements in final buffer ; note : src and dst buffer can overlap */
|
||||
if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
|
||||
{ size_t const dictSize = hSize + dictContentSize;
|
||||
char* dictEnd = (char*)dictBuffer + dictSize;
|
||||
memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
|
||||
memcpy(dictBuffer, header, hSize);
|
||||
/* Shrink the content size if it doesn't fit in the buffer */
|
||||
if (hSize + dictContentSize > dictBufferCapacity) {
|
||||
dictContentSize = dictBufferCapacity - hSize;
|
||||
}
|
||||
|
||||
/* Pad the dictionary content with zeros if it is too small */
|
||||
if (dictContentSize < minContentSize) {
|
||||
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
||||
"dictBufferCapacity too small to fit max repcode");
|
||||
paddingSize = minContentSize - dictContentSize;
|
||||
} else {
|
||||
paddingSize = 0;
|
||||
}
|
||||
|
||||
{
|
||||
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
||||
|
||||
/* The dictionary consists of the header, optional padding, and the content.
|
||||
* The padding comes before the content because the "best" position in the
|
||||
* dictionary is the last byte.
|
||||
*/
|
||||
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
||||
BYTE* const outDictPadding = outDictHeader + hSize;
|
||||
BYTE* const outDictContent = outDictPadding + paddingSize;
|
||||
|
||||
assert(dictSize <= dictBufferCapacity);
|
||||
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
||||
|
||||
/* First copy the customDictContent into its final location.
|
||||
* `customDictContent` and `dictBuffer` may overlap, so we must
|
||||
* do this before any other writes into the output buffer.
|
||||
* Then copy the header & padding into the output buffer.
|
||||
*/
|
||||
memmove(outDictContent, customDictContent, dictContentSize);
|
||||
memcpy(outDictHeader, header, hSize);
|
||||
memset(outDictPadding, 0, paddingSize);
|
||||
|
||||
return dictSize;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -8,32 +8,43 @@
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef DICTBUILDER_H_001
|
||||
#define DICTBUILDER_H_001
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_ZDICT_H
|
||||
#define ZSTD_ZDICT_H
|
||||
|
||||
/*====== Dependencies ======*/
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
||||
#ifndef ZDICTLIB_VISIBILITY
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
||||
#ifndef ZDICTLIB_VISIBLE
|
||||
/* Backwards compatibility with old macro name */
|
||||
# ifdef ZDICTLIB_VISIBILITY
|
||||
# define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
||||
# define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default")))
|
||||
# else
|
||||
# define ZDICTLIB_VISIBILITY
|
||||
# define ZDICTLIB_VISIBLE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ZDICTLIB_HIDDEN
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
||||
# define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden")))
|
||||
# else
|
||||
# define ZDICTLIB_HIDDEN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
||||
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
|
||||
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE
|
||||
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
||||
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
#else
|
||||
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
||||
# define ZDICTLIB_API ZDICTLIB_VISIBLE
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
@@ -46,7 +57,7 @@ extern "C" {
|
||||
*
|
||||
* Zstd can use dictionaries to improve compression ratio of small data.
|
||||
* Traditionally small files don't compress well because there is very little
|
||||
* repetion in a single sample, since it is small. But, if you are compressing
|
||||
* repetition in a single sample, since it is small. But, if you are compressing
|
||||
* many similar files, like a bunch of JSON records that share the same
|
||||
* structure, you can train a dictionary on ahead of time on some samples of
|
||||
* these files. Then, zstd can use the dictionary to find repetitions that are
|
||||
@@ -110,8 +121,8 @@ extern "C" {
|
||||
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
||||
* dictionary larger than that. But, most use cases can get away with a
|
||||
* smaller dictionary. The advanced dictionary builders can automatically
|
||||
* shrink the dictionary for you, and select a the smallest size that
|
||||
* doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
|
||||
* shrink the dictionary for you, and select the smallest size that doesn't
|
||||
* hurt compression ratio too much. See the `shrinkDict` parameter.
|
||||
* A smaller dictionary can save memory, and potentially speed up
|
||||
* compression.
|
||||
*
|
||||
@@ -132,7 +143,7 @@ extern "C" {
|
||||
*
|
||||
* # Benchmark levels 1-3 without a dictionary
|
||||
* zstd -b1e3 -r /path/to/my/files
|
||||
* # Benchmark levels 1-3 with a dictioanry
|
||||
* # Benchmark levels 1-3 with a dictionary
|
||||
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
||||
*
|
||||
* When should I retrain a dictionary?
|
||||
@@ -201,9 +212,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
||||
const size_t* samplesSizes, unsigned nbSamples);
|
||||
|
||||
typedef struct {
|
||||
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
||||
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
||||
int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */
|
||||
unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||
unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value)
|
||||
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
||||
* You may use them in private settings, but be warned that they
|
||||
* may be used by zstd in a public dictionary registry in the future.
|
||||
@@ -237,7 +248,6 @@ typedef struct {
|
||||
* is presumed that the most profitable content is at the end of the dictionary,
|
||||
* since that is the cheapest to reference.
|
||||
*
|
||||
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
||||
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
||||
*
|
||||
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
||||
@@ -261,9 +271,21 @@ ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictS
|
||||
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
||||
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
||||
|
||||
#endif /* ZSTD_ZDICT_H */
|
||||
|
||||
#if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC)
|
||||
#define ZSTD_ZDICT_H_STATIC
|
||||
|
||||
#ifdef ZDICT_STATIC_LINKING_ONLY
|
||||
/* This can be overridden externally to hide static symbols. */
|
||||
#ifndef ZDICTLIB_STATIC_API
|
||||
# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
||||
# define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE
|
||||
# elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
||||
# define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE
|
||||
# else
|
||||
# define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* ====================================================================================
|
||||
* The definitions in this section are considered experimental.
|
||||
@@ -272,8 +294,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
||||
* Use them only in association with static linking.
|
||||
* ==================================================================================== */
|
||||
|
||||
#define ZDICT_CONTENTSIZE_MIN 128
|
||||
#define ZDICT_DICTSIZE_MIN 256
|
||||
/* Deprecated: Remove in v1.6.0 */
|
||||
#define ZDICT_CONTENTSIZE_MIN 128
|
||||
|
||||
/*! ZDICT_cover_params_t:
|
||||
* k and d are the only required parameters.
|
||||
@@ -318,7 +341,7 @@ typedef struct {
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_cover_params_t parameters);
|
||||
@@ -340,7 +363,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_cover_params_t* parameters);
|
||||
@@ -361,7 +384,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
||||
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
||||
size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t parameters);
|
||||
@@ -384,7 +407,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
||||
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
||||
size_t dictBufferCapacity, const void* samplesBuffer,
|
||||
const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t* parameters);
|
||||
@@ -409,7 +432,7 @@ typedef struct {
|
||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
||||
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy(
|
||||
void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_legacy_params_t parameters);
|
||||
@@ -421,32 +444,31 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
||||
or _CRT_SECURE_NO_WARNINGS in Visual.
|
||||
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
||||
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
|
||||
# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */
|
||||
#else
|
||||
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
||||
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
|
||||
# define ZDICT_DEPRECATED(message) [[deprecated(message)]]
|
||||
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
||||
# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif (ZDICT_GCC_VERSION >= 301)
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|
||||
# define ZDICT_DEPRECATED(message) __attribute__((deprecated))
|
||||
# elif defined(_MSC_VER)
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
|
||||
# define ZDICT_DEPRECATED(message) __declspec(deprecated(message))
|
||||
# else
|
||||
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API
|
||||
# define ZDICT_DEPRECATED(message)
|
||||
# endif
|
||||
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
||||
|
||||
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
|
||||
ZDICTLIB_STATIC_API
|
||||
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
||||
|
||||
|
||||
#endif /* ZDICT_STATIC_LINKING_ONLY */
|
||||
#endif /* ZSTD_ZDICT_H_STATIC */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* DICTBUILDER_H_001 */
|
||||
|
||||
+744
-256
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
@@ -20,19 +20,31 @@ extern "C" {
|
||||
|
||||
|
||||
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
|
||||
#ifndef ZSTDERRORLIB_VISIBILITY
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
||||
#ifndef ZSTDERRORLIB_VISIBLE
|
||||
/* Backwards compatibility with old macro name */
|
||||
# ifdef ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
||||
# define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default")))
|
||||
# else
|
||||
# define ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_VISIBLE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ZSTDERRORLIB_HIDDEN
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
||||
# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
|
||||
# else
|
||||
# define ZSTDERRORLIB_HIDDEN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
||||
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
|
||||
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
||||
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
#else
|
||||
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
|
||||
#endif
|
||||
|
||||
/*-*********************************************
|
||||
@@ -58,14 +70,17 @@ typedef enum {
|
||||
ZSTD_error_frameParameter_windowTooLarge = 16,
|
||||
ZSTD_error_corruption_detected = 20,
|
||||
ZSTD_error_checksum_wrong = 22,
|
||||
ZSTD_error_literals_headerWrong = 24,
|
||||
ZSTD_error_dictionary_corrupted = 30,
|
||||
ZSTD_error_dictionary_wrong = 32,
|
||||
ZSTD_error_dictionaryCreation_failed = 34,
|
||||
ZSTD_error_parameter_unsupported = 40,
|
||||
ZSTD_error_parameter_combination_unsupported = 41,
|
||||
ZSTD_error_parameter_outOfBound = 42,
|
||||
ZSTD_error_tableLog_tooLarge = 44,
|
||||
ZSTD_error_maxSymbolValue_tooLarge = 46,
|
||||
ZSTD_error_maxSymbolValue_tooSmall = 48,
|
||||
ZSTD_error_stabilityCondition_notRespected = 50,
|
||||
ZSTD_error_stage_wrong = 60,
|
||||
ZSTD_error_init_missing = 62,
|
||||
ZSTD_error_memory_allocation = 64,
|
||||
@@ -73,11 +88,15 @@ typedef enum {
|
||||
ZSTD_error_dstSize_tooSmall = 70,
|
||||
ZSTD_error_srcSize_wrong = 72,
|
||||
ZSTD_error_dstBuffer_null = 74,
|
||||
ZSTD_error_noForwardProgress_destFull = 80,
|
||||
ZSTD_error_noForwardProgress_inputEmpty = 82,
|
||||
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
|
||||
ZSTD_error_frameIndex_tooLarge = 100,
|
||||
ZSTD_error_seekableIO = 102,
|
||||
ZSTD_error_dstBuffer_wrong = 104,
|
||||
ZSTD_error_srcBuffer_wrong = 105,
|
||||
ZSTD_error_sequenceProducer_failed = 106,
|
||||
ZSTD_error_externalSequences_invalid = 107,
|
||||
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
|
||||
} ZSTD_ErrorCode;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user