Add the native zstd dependency and impl

This commit is contained in:
Neil Macneale IV
2026-01-16 00:26:13 +00:00
parent f5b8c74807
commit 505bf9c7ca
3 changed files with 184 additions and 1 deletions

View File

@@ -72,6 +72,7 @@ require (
github.com/hashicorp/golang-lru/v2 v2.0.2
github.com/jmoiron/sqlx v1.3.4
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
github.com/klauspost/compress v1.18.0
github.com/kylelemons/godebug v1.1.0
github.com/lib/pq v1.10.0
github.com/mohae/uvarint v0.0.0-20160208145430-c3f9e62bf2b0
@@ -156,7 +157,6 @@ require (
github.com/googleapis/gax-go/v2 v2.14.2 // indirect
github.com/gorilla/mux v1.8.0 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/klauspost/cpuid/v2 v2.0.12 // indirect
github.com/lestrrat-go/strftime v1.0.4 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect

View File

@@ -0,0 +1,111 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zstd
import (
"github.com/klauspost/compress/zstd"
)
// nativeDictEncoder wraps a zstd encoder configured with a dictionary
type nativeDictEncoder struct {
encoder *zstd.Encoder
}
// nativeDictDecoder wraps a zstd decoder configured with a dictionary
type nativeDictDecoder struct {
decoder *zstd.Decoder
}
// NativeCompressor implements the Compressor interface using github.com/klauspost/compress/zstd
type NativeCompressor struct {
encoder *zstd.Encoder
decoder *zstd.Decoder
}
// NewNativeCompressor creates a new NativeCompressor
func NewNativeCompressor() (*NativeCompressor, error) {
encoder, err := zstd.NewWriter(nil)
if err != nil {
return nil, err
}
decoder, err := zstd.NewReader(nil)
if err != nil {
return nil, err
}
return &NativeCompressor{
encoder: encoder,
decoder: decoder,
}, nil
}
// Compress compresses data using klauspost/compress/zstd
func (n *NativeCompressor) Compress(dst, src []byte) []byte {
return n.encoder.EncodeAll(src, dst)
}
// Decompress decompresses data using klauspost/compress/zstd
func (n *NativeCompressor) Decompress(dst, src []byte) ([]byte, error) {
return n.decoder.DecodeAll(src, dst)
}
// CompressDict compresses data using a compression dictionary
func (n *NativeCompressor) CompressDict(dst, src []byte, dict *CDict) []byte {
if dict == nil || dict.impl == nil {
panic("runtime error: nil dictionary passed to native compressor")
}
if dictEncoder, ok := dict.impl.(*nativeDictEncoder); ok {
return dictEncoder.encoder.EncodeAll(src, dst)
}
panic("runtime error: invalid dictionary type for native compressor")
}
// DecompressDict decompresses data using a decompression dictionary
func (n *NativeCompressor) DecompressDict(dst, src []byte, dict *DDict) ([]byte, error) {
if dict == nil || dict.impl == nil {
panic("runtime error: nil dictionary passed to native compressor")
}
if dictDecoder, ok := dict.impl.(*nativeDictDecoder); ok {
return dictDecoder.decoder.DecodeAll(src, dst)
}
panic("runtime error: invalid dictionary type for native compressor")
}
// NewCDict creates a new compression dictionary
func (n *NativeCompressor) NewCDict(dict []byte) (*CDict, error) {
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderDict(dict))
if err != nil {
return nil, err
}
return &CDict{impl: &nativeDictEncoder{encoder: encoder}}, nil
}
// NewDDict creates a new decompression dictionary
func (n *NativeCompressor) NewDDict(dict []byte) (*DDict, error) {
decoder, err := zstd.NewReader(nil, zstd.WithDecoderDicts(dict))
if err != nil {
return nil, err
}
return &DDict{impl: &nativeDictDecoder{decoder: decoder}}, nil
}
// BuildDict builds a dictionary from training samples
func (n *NativeCompressor) BuildDict(samples [][]byte, dictSize int) []byte {
// klauspost/compress/zstd BuildDict is effectively unusable - it fails even with
// thousands of samples with obvious repeated patterns. Build logic should prevent us from reaching
// this point.
panic("runtime error: BuildDict not supported in native implementation - use CGO implementation")
}

View File

@@ -0,0 +1,72 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zstd
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestNativeCompressionRoundtrip(t *testing.T) {
compressor, err := NewNativeCompressor()
require.NoError(t, err)
input := []byte("Hello, world! This is a test of native zstd compression.")
compressed := compressor.Compress(nil, input)
decompressed, err := compressor.Decompress(nil, compressed)
require.NoError(t, err)
require.Equal(t, string(input), string(decompressed))
}
func TestNativeDictionaryCompressionRoundtrip(t *testing.T) {
nativeCompressor, err := NewNativeCompressor()
require.NoError(t, err)
// Use CGO implementation to build a working dictionary since native BuildDict is unusable
cgoCompressor := NewGozstdCompressor()
samples := [][]byte{
[]byte("This is sample text with common words and phrases."),
[]byte("Common words and phrases appear frequently."),
[]byte("Sample text contains common patterns."),
}
dictData := cgoCompressor.BuildDict(samples, 256)
if len(dictData) == 0 {
t.Skip("Could not build dictionary for testing")
}
testData := []byte("This is test data with common words that match the dictionary.")
// Test that native implementation can use CGO-built dictionary
cDict, err := nativeCompressor.NewCDict(dictData)
require.NoError(t, err)
dDict, err := nativeCompressor.NewDDict(dictData)
require.NoError(t, err)
// Compress with dictionary using native implementation
compressed := nativeCompressor.CompressDict(nil, testData, cDict)
// Decompress with dictionary using native implementation
decompressed, err := nativeCompressor.DecompressDict(nil, compressed, dDict)
require.NoError(t, err)
// Verify roundtrip
require.Equal(t, string(testData), string(decompressed))
}