mirror of
https://github.com/dolthub/dolt.git
synced 2026-02-04 18:49:00 -06:00
Add the native zstd dependency and impl
This commit is contained in:
@@ -72,6 +72,7 @@ require (
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.2
|
||||
github.com/jmoiron/sqlx v1.3.4
|
||||
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
|
||||
github.com/klauspost/compress v1.18.0
|
||||
github.com/kylelemons/godebug v1.1.0
|
||||
github.com/lib/pq v1.10.0
|
||||
github.com/mohae/uvarint v0.0.0-20160208145430-c3f9e62bf2b0
|
||||
@@ -156,7 +157,6 @@ require (
|
||||
github.com/googleapis/gax-go/v2 v2.14.2 // indirect
|
||||
github.com/gorilla/mux v1.8.0 // indirect
|
||||
github.com/hashicorp/golang-lru v0.5.4 // indirect
|
||||
github.com/klauspost/compress v1.18.0 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.0.12 // indirect
|
||||
github.com/lestrrat-go/strftime v1.0.4 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||
|
||||
111
go/store/nbs/zstd/native_impl.go
Normal file
111
go/store/nbs/zstd/native_impl.go
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zstd
|
||||
|
||||
import (
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
// nativeDictEncoder wraps a zstd encoder configured with a dictionary
|
||||
type nativeDictEncoder struct {
|
||||
encoder *zstd.Encoder
|
||||
}
|
||||
|
||||
// nativeDictDecoder wraps a zstd decoder configured with a dictionary
|
||||
type nativeDictDecoder struct {
|
||||
decoder *zstd.Decoder
|
||||
}
|
||||
|
||||
// NativeCompressor implements the Compressor interface using github.com/klauspost/compress/zstd
|
||||
type NativeCompressor struct {
|
||||
encoder *zstd.Encoder
|
||||
decoder *zstd.Decoder
|
||||
}
|
||||
|
||||
// NewNativeCompressor creates a new NativeCompressor
|
||||
func NewNativeCompressor() (*NativeCompressor, error) {
|
||||
encoder, err := zstd.NewWriter(nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
decoder, err := zstd.NewReader(nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &NativeCompressor{
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Compress compresses data using klauspost/compress/zstd
|
||||
func (n *NativeCompressor) Compress(dst, src []byte) []byte {
|
||||
return n.encoder.EncodeAll(src, dst)
|
||||
}
|
||||
|
||||
// Decompress decompresses data using klauspost/compress/zstd
|
||||
func (n *NativeCompressor) Decompress(dst, src []byte) ([]byte, error) {
|
||||
return n.decoder.DecodeAll(src, dst)
|
||||
}
|
||||
|
||||
// CompressDict compresses data using a compression dictionary
|
||||
func (n *NativeCompressor) CompressDict(dst, src []byte, dict *CDict) []byte {
|
||||
if dict == nil || dict.impl == nil {
|
||||
panic("runtime error: nil dictionary passed to native compressor")
|
||||
}
|
||||
if dictEncoder, ok := dict.impl.(*nativeDictEncoder); ok {
|
||||
return dictEncoder.encoder.EncodeAll(src, dst)
|
||||
}
|
||||
panic("runtime error: invalid dictionary type for native compressor")
|
||||
}
|
||||
|
||||
// DecompressDict decompresses data using a decompression dictionary
|
||||
func (n *NativeCompressor) DecompressDict(dst, src []byte, dict *DDict) ([]byte, error) {
|
||||
if dict == nil || dict.impl == nil {
|
||||
panic("runtime error: nil dictionary passed to native compressor")
|
||||
}
|
||||
if dictDecoder, ok := dict.impl.(*nativeDictDecoder); ok {
|
||||
return dictDecoder.decoder.DecodeAll(src, dst)
|
||||
}
|
||||
panic("runtime error: invalid dictionary type for native compressor")
|
||||
}
|
||||
|
||||
// NewCDict creates a new compression dictionary
|
||||
func (n *NativeCompressor) NewCDict(dict []byte) (*CDict, error) {
|
||||
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderDict(dict))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &CDict{impl: &nativeDictEncoder{encoder: encoder}}, nil
|
||||
}
|
||||
|
||||
// NewDDict creates a new decompression dictionary
|
||||
func (n *NativeCompressor) NewDDict(dict []byte) (*DDict, error) {
|
||||
decoder, err := zstd.NewReader(nil, zstd.WithDecoderDicts(dict))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &DDict{impl: &nativeDictDecoder{decoder: decoder}}, nil
|
||||
}
|
||||
|
||||
// BuildDict builds a dictionary from training samples
|
||||
func (n *NativeCompressor) BuildDict(samples [][]byte, dictSize int) []byte {
|
||||
// klauspost/compress/zstd BuildDict is effectively unusable - it fails even with
|
||||
// thousands of samples with obvious repeated patterns. Build logic should prevent us from reaching
|
||||
// this point.
|
||||
panic("runtime error: BuildDict not supported in native implementation - use CGO implementation")
|
||||
}
|
||||
72
go/store/nbs/zstd/native_impl_test.go
Normal file
72
go/store/nbs/zstd/native_impl_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package zstd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNativeCompressionRoundtrip(t *testing.T) {
|
||||
compressor, err := NewNativeCompressor()
|
||||
require.NoError(t, err)
|
||||
|
||||
input := []byte("Hello, world! This is a test of native zstd compression.")
|
||||
|
||||
compressed := compressor.Compress(nil, input)
|
||||
|
||||
decompressed, err := compressor.Decompress(nil, compressed)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, string(input), string(decompressed))
|
||||
}
|
||||
|
||||
func TestNativeDictionaryCompressionRoundtrip(t *testing.T) {
|
||||
nativeCompressor, err := NewNativeCompressor()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Use CGO implementation to build a working dictionary since native BuildDict is unusable
|
||||
cgoCompressor := NewGozstdCompressor()
|
||||
|
||||
samples := [][]byte{
|
||||
[]byte("This is sample text with common words and phrases."),
|
||||
[]byte("Common words and phrases appear frequently."),
|
||||
[]byte("Sample text contains common patterns."),
|
||||
}
|
||||
|
||||
dictData := cgoCompressor.BuildDict(samples, 256)
|
||||
if len(dictData) == 0 {
|
||||
t.Skip("Could not build dictionary for testing")
|
||||
}
|
||||
|
||||
testData := []byte("This is test data with common words that match the dictionary.")
|
||||
|
||||
// Test that native implementation can use CGO-built dictionary
|
||||
cDict, err := nativeCompressor.NewCDict(dictData)
|
||||
require.NoError(t, err)
|
||||
|
||||
dDict, err := nativeCompressor.NewDDict(dictData)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Compress with dictionary using native implementation
|
||||
compressed := nativeCompressor.CompressDict(nil, testData, cDict)
|
||||
|
||||
// Decompress with dictionary using native implementation
|
||||
decompressed, err := nativeCompressor.DecompressDict(nil, compressed, dDict)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify roundtrip
|
||||
require.Equal(t, string(testData), string(decompressed))
|
||||
}
|
||||
Reference in New Issue
Block a user