From 505bf9c7ca624e8864f750ca2e8cdb69033a3744 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Fri, 16 Jan 2026 00:26:13 +0000 Subject: [PATCH] Add the native zstd dependency and impl --- go/go.mod | 2 +- go/store/nbs/zstd/native_impl.go | 111 ++++++++++++++++++++++++++ go/store/nbs/zstd/native_impl_test.go | 72 +++++++++++++++++ 3 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 go/store/nbs/zstd/native_impl.go create mode 100644 go/store/nbs/zstd/native_impl_test.go diff --git a/go/go.mod b/go/go.mod index 963d7c0a3a..40b12e2ea4 100644 --- a/go/go.mod +++ b/go/go.mod @@ -72,6 +72,7 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.2 github.com/jmoiron/sqlx v1.3.4 github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 + github.com/klauspost/compress v1.18.0 github.com/kylelemons/godebug v1.1.0 github.com/lib/pq v1.10.0 github.com/mohae/uvarint v0.0.0-20160208145430-c3f9e62bf2b0 @@ -156,7 +157,6 @@ require ( github.com/googleapis/gax-go/v2 v2.14.2 // indirect github.com/gorilla/mux v1.8.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect - github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/cpuid/v2 v2.0.12 // indirect github.com/lestrrat-go/strftime v1.0.4 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect diff --git a/go/store/nbs/zstd/native_impl.go b/go/store/nbs/zstd/native_impl.go new file mode 100644 index 0000000000..0d5a770fcb --- /dev/null +++ b/go/store/nbs/zstd/native_impl.go @@ -0,0 +1,111 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zstd + +import ( + "github.com/klauspost/compress/zstd" +) + +// nativeDictEncoder wraps a zstd encoder configured with a dictionary +type nativeDictEncoder struct { + encoder *zstd.Encoder +} + +// nativeDictDecoder wraps a zstd decoder configured with a dictionary +type nativeDictDecoder struct { + decoder *zstd.Decoder +} + +// NativeCompressor implements the Compressor interface using github.com/klauspost/compress/zstd +type NativeCompressor struct { + encoder *zstd.Encoder + decoder *zstd.Decoder +} + +// NewNativeCompressor creates a new NativeCompressor +func NewNativeCompressor() (*NativeCompressor, error) { + encoder, err := zstd.NewWriter(nil) + if err != nil { + return nil, err + } + + decoder, err := zstd.NewReader(nil) + if err != nil { + return nil, err + } + + return &NativeCompressor{ + encoder: encoder, + decoder: decoder, + }, nil +} + +// Compress compresses data using klauspost/compress/zstd +func (n *NativeCompressor) Compress(dst, src []byte) []byte { + return n.encoder.EncodeAll(src, dst) +} + +// Decompress decompresses data using klauspost/compress/zstd +func (n *NativeCompressor) Decompress(dst, src []byte) ([]byte, error) { + return n.decoder.DecodeAll(src, dst) +} + +// CompressDict compresses data using a compression dictionary +func (n *NativeCompressor) CompressDict(dst, src []byte, dict *CDict) []byte { + if dict == nil || dict.impl == nil { + panic("runtime error: nil dictionary passed to native compressor") + } + if dictEncoder, ok := dict.impl.(*nativeDictEncoder); ok { + return dictEncoder.encoder.EncodeAll(src, dst) + } + panic("runtime error: invalid dictionary type for native compressor") +} + +// DecompressDict decompresses data using a decompression dictionary +func (n *NativeCompressor) DecompressDict(dst, src []byte, dict *DDict) ([]byte, error) { + if dict == nil || dict.impl == nil { + panic("runtime error: nil dictionary passed to native compressor") + } + if dictDecoder, ok := dict.impl.(*nativeDictDecoder); ok { + return dictDecoder.decoder.DecodeAll(src, dst) + } + panic("runtime error: invalid dictionary type for native compressor") +} + +// NewCDict creates a new compression dictionary +func (n *NativeCompressor) NewCDict(dict []byte) (*CDict, error) { + encoder, err := zstd.NewWriter(nil, zstd.WithEncoderDict(dict)) + if err != nil { + return nil, err + } + return &CDict{impl: &nativeDictEncoder{encoder: encoder}}, nil +} + +// NewDDict creates a new decompression dictionary +func (n *NativeCompressor) NewDDict(dict []byte) (*DDict, error) { + decoder, err := zstd.NewReader(nil, zstd.WithDecoderDicts(dict)) + if err != nil { + return nil, err + } + return &DDict{impl: &nativeDictDecoder{decoder: decoder}}, nil +} + +// BuildDict builds a dictionary from training samples +func (n *NativeCompressor) BuildDict(samples [][]byte, dictSize int) []byte { + // klauspost/compress/zstd BuildDict is effectively unusable - it fails even with + // thousands of samples with obvious repeated patterns. Build logic should prevent us from reaching + // this point. + panic("runtime error: BuildDict not supported in native implementation - use CGO implementation") +} diff --git a/go/store/nbs/zstd/native_impl_test.go b/go/store/nbs/zstd/native_impl_test.go new file mode 100644 index 0000000000..d768bb52fb --- /dev/null +++ b/go/store/nbs/zstd/native_impl_test.go @@ -0,0 +1,72 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zstd + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNativeCompressionRoundtrip(t *testing.T) { + compressor, err := NewNativeCompressor() + require.NoError(t, err) + + input := []byte("Hello, world! This is a test of native zstd compression.") + + compressed := compressor.Compress(nil, input) + + decompressed, err := compressor.Decompress(nil, compressed) + require.NoError(t, err) + require.Equal(t, string(input), string(decompressed)) +} + +func TestNativeDictionaryCompressionRoundtrip(t *testing.T) { + nativeCompressor, err := NewNativeCompressor() + require.NoError(t, err) + + // Use CGO implementation to build a working dictionary since native BuildDict is unusable + cgoCompressor := NewGozstdCompressor() + + samples := [][]byte{ + []byte("This is sample text with common words and phrases."), + []byte("Common words and phrases appear frequently."), + []byte("Sample text contains common patterns."), + } + + dictData := cgoCompressor.BuildDict(samples, 256) + if len(dictData) == 0 { + t.Skip("Could not build dictionary for testing") + } + + testData := []byte("This is test data with common words that match the dictionary.") + + // Test that native implementation can use CGO-built dictionary + cDict, err := nativeCompressor.NewCDict(dictData) + require.NoError(t, err) + + dDict, err := nativeCompressor.NewDDict(dictData) + require.NoError(t, err) + + // Compress with dictionary using native implementation + compressed := nativeCompressor.CompressDict(nil, testData, cDict) + + // Decompress with dictionary using native implementation + decompressed, err := nativeCompressor.DecompressDict(nil, compressed, dDict) + require.NoError(t, err) + + // Verify roundtrip + require.Equal(t, string(testData), string(decompressed)) +}