Merge remote-tracking branch 'origin/main' into nicktobey/sqldiff

This commit is contained in:
Nick Tobey
2026-05-08 12:14:59 -07:00
28 changed files with 133 additions and 156 deletions
@@ -0,0 +1,31 @@
name: 'Set up Go toolchain'
description: 'Set up Go and platform-specific dependencies (ICU4C) needed to build dolt. On macOS, sets CGO flags pointing at brew icu4c. On Windows, installs MSYS2 with icu/toolchain/pkg-config and exposes the install path via the msys2-location output. On Linux, just sets up Go.'
outputs:
msys2-location:
description: 'Path to the MSYS2 installation (Windows only).'
value: ${{ steps.msys2.outputs.msys2-location }}
runs:
using: 'composite'
steps:
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
- name: Install ICU4C (MacOS)
if: runner.os == 'macOS'
shell: bash
run: |
dir=$(brew --cellar icu4c)
dir="$dir"/$(ls "$dir")
echo CGO_CPPFLAGS=-I$dir/include >> $GITHUB_ENV
echo CGO_LDFLAGS=-L$dir/lib >> $GITHUB_ENV
- name: Install ICU4C (Windows)
id: msys2
if: runner.os == 'Windows'
uses: msys2/setup-msys2@v2
with:
path-type: inherit
msystem: UCRT64
pacboy: icu:p toolchain:p pkg-config:p
+2 -9
View File
@@ -40,11 +40,8 @@ jobs:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-duration-seconds: 10800 # 3 hours D:
- uses: actions/checkout@v6
- name: Setup Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Setup Python 3.x
uses: actions/setup-python@v5
with:
@@ -75,10 +72,6 @@ jobs:
- name: Install Dolt
working-directory: ./go
run: |
icu4c_dir=$(brew --cellar icu4c)
icu4c_dir="$dir"/$(ls "$icu4c_dir")
export CGO_CPPFLAGS=-I$icu4c_dir/include
export CGO_LDFLAGS=-L$icu4c_dir/lib
go build -mod=readonly -o ../.ci_bin/dolt ./cmd/dolt/.
go build -mod=readonly -o ../.ci_bin/remotesrv ./utils/remotesrv/.
go build -mod=readonly -o ../.ci_bin/noms ./store/cmd/noms/.
+2 -5
View File
@@ -55,11 +55,8 @@ jobs:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-duration-seconds: 10800 # 3 hours D:
- uses: actions/checkout@v6
- name: Setup Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Setup Python 3.x
uses: actions/setup-python@v5
with:
+2 -5
View File
@@ -26,12 +26,9 @@ jobs:
steps:
- uses: actions/checkout@v6
if: ${{ env.use_credentials != 'true' }}
- name: Setup Go 1.x
- name: Set up Go toolchain
if: ${{ env.use_credentials != 'true' }}
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
uses: ./.github/actions/setup-go-toolchain
- name: Setup Python 3.x
if: ${{ env.use_credentials != 'true' }}
uses: actions/setup-python@v5
+2 -5
View File
@@ -59,11 +59,8 @@ jobs:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-duration-seconds: 10800 # 3 hours D:
- uses: actions/checkout@v6
- name: Setup Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Setup Python 3.x
uses: actions/setup-python@v5
with:
+2 -11
View File
@@ -96,11 +96,8 @@ jobs:
if: ${{ github.event_name == 'repository_dispatch' }}
with:
ref: ${{ github.event.client_payload.ref }}
- name: Setup Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Setup Python 3.x
uses: actions/setup-python@v5
with:
@@ -132,12 +129,6 @@ jobs:
pip install mysql-connector-python
pip install pandas
pip install pyarrow
- name: Install ICU4C (Windows)
uses: msys2/setup-msys2@v2
with:
path-type: inherit
msystem: UCRT64
pacboy: icu:p toolchain:p pkg-config:p
- name: Install Dolt
working-directory: ./go
shell: msys2 {0}
+4 -10
View File
@@ -25,11 +25,8 @@ jobs:
adaptive_encoding: ["false", "true"]
steps:
- uses: actions/checkout@v6
- name: Set up Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Test Binlog
working-directory: ./go
run: |
@@ -53,11 +50,8 @@ jobs:
adaptive_encoding: ["false"]
steps:
- uses: actions/checkout@v6
- name: Set up Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Test Binlog with Race
working-directory: ./go
run: |
@@ -22,11 +22,8 @@ jobs:
adaptive_encoding: [ "false", "true" ]
steps:
- uses: actions/checkout@v6
- name: Setup Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- uses: actions/setup-node@v4
with:
node-version: ^16
+2 -5
View File
@@ -22,11 +22,8 @@ jobs:
adaptive_encoding: [ "false", "true" ]
steps:
- uses: actions/checkout@v6
- name: Set up Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Test engine
working-directory: ./go
run: |
+4 -10
View File
@@ -23,11 +23,8 @@ jobs:
DOLT_USE_ADAPTIVE_ENCODING: "true"
steps:
- uses: actions/checkout@v6
- name: Set up Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Test All
working-directory: ./go
run: |
@@ -67,11 +64,8 @@ jobs:
DOLT_USE_ADAPTIVE_ENCODING: "true"
steps:
- uses: actions/checkout@v6
- name: Set up Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Test All
working-directory: ./go
run: |
+8 -42
View File
@@ -25,26 +25,9 @@ jobs:
os: [macos-latest, ubuntu-22.04, windows-latest]
steps:
- uses: actions/checkout@v6
- name: Set up Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Install ICU4C (MacOS)
if: ${{ matrix.os == 'macos-latest' }}
run: |
dir=$(brew --cellar icu4c)
dir="$dir"/$(ls "$dir")
echo CGO_CPPFLAGS=-I$dir/include >> $GITHUB_ENV
echo CGO_LDFLAGS=-L$dir/lib >> $GITHUB_ENV
- name: Install ICU4C (Windows)
id: msys2
if: ${{ matrix.os == 'windows-latest' }}
uses: msys2/setup-msys2@v2
with:
path-type: inherit
msystem: UCRT64
pacboy: icu:p toolchain:p pkg-config:p
- name: Set up Go toolchain
id: toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Test All
working-directory: ./go
run: |
@@ -84,7 +67,7 @@ jobs:
done
env:
MATRIX_OS: ${{ matrix.os }}
MSYS2_LOCATION: ${{ steps.msys2.outputs.msys2-location }}
MSYS2_LOCATION: ${{ steps.toolchain.outputs.msys2-location }}
noracetest:
name: Go tests - no race
defaults:
@@ -97,26 +80,9 @@ jobs:
os: [macos-latest, ubuntu-22.04, windows-latest]
steps:
- uses: actions/checkout@v6
- name: Set up Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Install ICU4C (MacOS)
if: ${{ matrix.os == 'macos-latest' }}
run: |
dir=$(brew --cellar icu4c)
dir="$dir"/$(ls "$dir")
echo CGO_CPPFLAGS=-I$dir/include >> $GITHUB_ENV
echo CGO_LDFLAGS=-L$dir/lib >> $GITHUB_ENV
- name: Install ICU4C (Windows)
if: ${{ matrix.os == 'windows-latest' }}
id: msys2
uses: msys2/setup-msys2@v2
with:
path-type: inherit
msystem: UCRT64
pacboy: icu:p toolchain:p pkg-config:p
- name: Set up Go toolchain
id: toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Test All
working-directory: ./go
run: |
@@ -127,4 +93,4 @@ jobs:
env:
MATRIX_OS: ${{ matrix.os }}
DOLT_TEST_RUN_NON_RACE_TESTS: "true"
MSYS2_LOCATION: ${{ steps.msys2.outputs.msys2-location }}
MSYS2_LOCATION: ${{ steps.toolchain.outputs.msys2-location }}
@@ -25,11 +25,8 @@ jobs:
adaptive_encoding: ["false", "true"]
steps:
- uses: actions/checkout@v6
- name: Setup Go 1.x
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
id: go
- name: Set up Go toolchain
uses: ./.github/actions/setup-go-toolchain
- name: Create CI Bin
run: |
mkdir -p ./.ci_bin
+1 -1
View File
@@ -15,5 +15,5 @@
package doltversion
const (
Version = "1.87.0"
Version = "2.0.0"
)
+16 -1
View File
@@ -453,7 +453,19 @@ func (rcv *Column) MutateHiddenSystem(n bool) bool {
return rcv._tab.MutateBoolSlot(34, n)
}
const ColumnNumFields = 16
func (rcv *Column) AdaptiveEncodingBreakingChange() bool {
o := flatbuffers.UOffsetT(rcv._tab.Offset(36))
if o != 0 {
return rcv._tab.GetBool(o + rcv._tab.Pos)
}
return false
}
func (rcv *Column) MutateAdaptiveEncodingBreakingChange(n bool) bool {
return rcv._tab.MutateBoolSlot(36, n)
}
const ColumnNumFields = 17
func ColumnStart(builder *flatbuffers.Builder) {
builder.StartObject(ColumnNumFields)
@@ -506,6 +518,9 @@ func ColumnAddUsesAdaptiveEncoding(builder *flatbuffers.Builder, usesAdaptiveEnc
func ColumnAddHiddenSystem(builder *flatbuffers.Builder, hiddenSystem bool) {
builder.PrependBoolSlot(15, hiddenSystem, false)
}
func ColumnAddAdaptiveEncodingBreakingChange(builder *flatbuffers.Builder, adaptiveEncodingBreakingChange bool) {
builder.PrependBoolSlot(16, adaptiveEncodingBreakingChange, false)
}
func ColumnEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT {
return builder.EndObject()
}
+1 -1
View File
@@ -64,7 +64,7 @@ require (
github.com/dolthub/dolt-mcp v0.3.4
github.com/dolthub/eventsapi_schema v0.0.0-20260310172945-37a9265ade69
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-mysql-server v0.20.1-0.20260505171600-5a9dda3f04ff
github.com/dolthub/go-mysql-server v0.20.1-0.20260507202550-43d6daf5958b
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63
github.com/edsrzf/mmap-go v1.2.0
github.com/esote/minmaxheap v1.0.0
+2 -2
View File
@@ -212,8 +212,8 @@ github.com/dolthub/fslock v0.0.0-20251215194149-ef20baba2318 h1:n+vdH5G5Db+1qnDC
github.com/dolthub/fslock v0.0.0-20251215194149-ef20baba2318/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20260412212219-49724d547866 h1:U6gSf5I0e6h6GP1/5Sa7D2lWW1CWfcVPtY5wkyHq6jY=
github.com/dolthub/go-icu-regex v0.0.0-20260412212219-49724d547866/go.mod h1:F3cnm+vMRK1HaU6+rNqQrOCyR03HHhR1GWG2gnPOqaE=
github.com/dolthub/go-mysql-server v0.20.1-0.20260505171600-5a9dda3f04ff h1:q3GZb7jKVgbn0hU4f458E4vTX4NMzEVSkddC3M3dI80=
github.com/dolthub/go-mysql-server v0.20.1-0.20260505171600-5a9dda3f04ff/go.mod h1:55n1yslSIZ5uewFbtd82DsYt3f9vUKwnRN5GZJie+nE=
github.com/dolthub/go-mysql-server v0.20.1-0.20260507202550-43d6daf5958b h1:Ew5nacrGUHRVyae1+/0vUjt3ZbX/6vaDEZfeDNMCSj8=
github.com/dolthub/go-mysql-server v0.20.1-0.20260507202550-43d6daf5958b/go.mod h1:55n1yslSIZ5uewFbtd82DsYt3f9vUKwnRN5GZJie+nE=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
github.com/dolthub/ishell v0.0.0-20260414231531-5f031e3e9037 h1:oIW9HwuWrhxv+4HZxA+QQSKHLqWFyXZ2FmNjUYwkdiM=
@@ -2246,7 +2246,7 @@ func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAd
return nil, true, err
}
return mergeJSON(ctx, m.ns, baseDoc, leftDoc, rightDoc)
return MergeJSON(ctx, m.ns, baseDoc, leftDoc, rightDoc)
}
func (m *valueMerger) mergeJSONAdaptive(ctx context.Context, baseField []byte, leftField []byte, rightField []byte) (result interface{}, conflict bool, err error) {
@@ -2279,10 +2279,15 @@ func (m *valueMerger) mergeJSONAdaptive(ctx context.Context, baseField []byte, l
return nil, false, err
}
return mergeJSON(ctx, m.ns, baseJson, leftJson, rightJson)
return MergeJSON(ctx, m.ns, baseJson, leftJson, rightJson)
}
func mergeJSON(ctx context.Context, ns tree.NodeStore, baseJson, leftJson, rightJson sql.JSONWrapper) (resultDoc sql.JSONWrapper, conflict bool, err error) {
// MergeJSON performs a three-way merge of JSON documents. Non-overlapping
// field changes are merged automatically; overlapping changes produce a
// conflict (conflict=true). All three inputs must be JSON objects for
// field-level merging; if any input is not an object, the function falls
// back to equality comparison.
func MergeJSON(ctx context.Context, ns tree.NodeStore, baseJson, leftJson, rightJson sql.JSONWrapper) (resultDoc sql.JSONWrapper, conflict bool, err error) {
// First, deserialize each value into JSON.
// We can only merge if the value at all three commits is a JSON object.
baseIsObject, err := tree.IsJsonObject(ctx, baseJson)
@@ -121,7 +121,7 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e
// If the key existed at base, we can do a recursive three-way merge to resolve
// changes to the values.
// This shouldn't be necessary: if its an object on all three branches, the original diff is recursive.
mergedValue, conflict, err := mergeJSON(ctx, differ.ns, differ.leftCurrentDiff.From,
mergedValue, conflict, err := MergeJSON(ctx, differ.ns, differ.leftCurrentDiff.From,
differ.leftCurrentDiff.To,
differ.rightCurrentDiff.To)
if err != nil {
@@ -281,6 +281,7 @@ func serializeSchemaColumns(b *fb.Builder, sch schema.Schema) fb.UOffsetT {
// adaptive encoded field as a literal value.
if usesAdaptiveEncoding(col) {
serial.ColumnAddUsesAdaptiveEncoding(b, true)
serial.ColumnAddAdaptiveEncodingBreakingChange(b, true)
}
if col.Hidden {
@@ -29,11 +29,11 @@ import (
// UseAdaptiveEncoding indicates whether to use adaptive encoding for large/unbounded fields instead of address
// encoding. Tests can set this variable to true in order to force Dolt to use adaptive encoding for TEXT and BLOB
// columns. Extended types will always use adaptive encoding for TEXT and BLOB types regardless of this value.
var UseAdaptiveEncoding = false
var UseAdaptiveEncoding = true
func init() {
if envVal, ok := os.LookupEnv("DOLT_USE_ADAPTIVE_ENCODING"); ok && envVal == "true" {
UseAdaptiveEncoding = true
if envVal, ok := os.LookupEnv("DOLT_USE_ADAPTIVE_ENCODING"); ok && envVal == "false" {
UseAdaptiveEncoding = false
}
}
@@ -800,7 +800,7 @@ func (di *doltIndex) HasContentHashedField() bool {
}
func (di *doltIndex) Order(ctx *sql.Context) sql.IndexOrder {
if di.HasContentHashedField() {
if di.HasContentHashedField() || di.IsSpatial() {
return sql.IndexOrderNone
}
+5
View File
@@ -79,6 +79,11 @@ table Column {
// hidden system columns are not directly accessible or visible by users, and are used to
// store functional expression results for use in secondary indexes
hidden_system:bool;
// A range of 1.x releases from approximately 1.86.0 onwards had a nasty bug with adaptive encoded values where
// data loss could occur during garbage collection or pull operations. This field forces clients from that release
// range to upgrade before reading schemas with adaptive encoded columns.
adaptive_encoding_breaking_change:bool;
}
table Index {
+3
View File
@@ -282,6 +282,9 @@ func (sm SerialMessage) HumanReadableStringAtIndentationLevel(level int) string
printWithIndendationLevel(level, ret, "Generated: %t\n", col.Generated())
printWithIndendationLevel(level, ret, "Virtual: %t\n", col.Virtual())
printWithIndendationLevel(level, ret, "OnUpdateValue: %s\n", col.OnUpdateValue())
printWithIndendationLevel(level, ret, "UsesAdaptiveEncoding: %t\n", col.UsesAdaptiveEncoding())
printWithIndendationLevel(level, ret, "AdaptiveEncodingBreakingChange: %t\n", col.AdaptiveEncodingBreakingChange())
level -= 1
printWithIndendationLevel(level, ret, "}\n")
+10
View File
@@ -756,6 +756,16 @@ func (td *TupleDesc) formatValue(ctx context.Context, enc Encoding, i int, value
return strconv.FormatUint(v, 10)
case StringEnc:
return readString(value)
case StringAdaptiveEnc, BytesAdaptiveEnc:
if b, isInline := InlineValueBytes(value); isInline {
return string(b)
}
// for out of band values, we don't want to load the value just to format it, so we return a hex string of the bytes
// TODO: this is used in user-facing error messages like duplicate key errors, but in this Format method it's not
// appropriate to make assumptions about what format is correct for specific use cases involving large values.
// We should find places that use adaptive encoded values for user-facing messages and decide what format is
// best there.
return hex.EncodeToString(value)
case ByteStringEnc:
return hex.EncodeToString(value)
case Hash128Enc:
+1 -30
View File
@@ -255,21 +255,6 @@ assert_has_key_value() {
[[ "$output" =~ "branch not found: branch1" ]] || false
}
@test "show: primary index leaf" {
if [ "$DOLT_USE_ADAPTIVE_ENCODING" = "true" ]; then
skip "adaptive encoding stores small values inline; see 'primary index leaf - adaptive encoding'"
fi
dolt sql <<EOF
create table test(pk int primary key, t text, j json);
insert into test values (0, "Hello", "{}"), (1, "World", "[]");
EOF
run dolt show "#9heeqrj6idph7snnko484sqnobu2r46i"
[ $status -eq 0 ]
[[ "$output" =~ "SerialMessage" ]] || false
[[ "$output" =~ "{ key: 00000000 value: #0isi5776c0lu0d7rvsnfl80gsdisilsa, #e6sucun84ck3bgc1p9lorkibp30mvd2f }" ]] || false
[[ "$output" =~ "{ key: 01000000 value: #8scr7d6rtnafqovoa7d06em7jkpil9gg, #8arugs9qup4pvpmqbf64lpkm9f6cdv74 }" ]] || false
}
@test "show: primary index leaf - adaptive encoding" {
if [ "$DOLT_USE_ADAPTIVE_ENCODING" != "true" ]; then
skip "requires adaptive encoding; see 'primary index leaf'"
@@ -285,20 +270,6 @@ EOF
[[ "$output" =~ "{ key: 01000000 value: f90e80712c1adb322a6ed69f616964bd76ba20b6ec7a8d, f90a9c74faa5553fc209573262e1ea61748437af04a163 }" ]] || false
}
@test "show: blob leaf" {
if [ "$DOLT_USE_ADAPTIVE_ENCODING" = "true" ]; then
skip "adaptive encoding stores small values inline; see 'blob leaf - adaptive encoding'"
fi
dolt sql <<EOF
create table test(pk int primary key, t text, j json);
insert into test values (0, "Hello", "{}"), (1, "World", "[]");
EOF
run dolt show "#0isi5776c0lu0d7rvsnfl80gsdisilsa"
[ $status -eq 0 ]
[[ "$output" =~ "SerialMessage" ]] || false
[[ "$output" =~ "Blob - Hello" ]] || false
}
@test "show: blob leaf - adaptive encoding" {
if [ "$DOLT_USE_ADAPTIVE_ENCODING" != "true" ]; then
skip "requires adaptive encoding; see 'blob leaf'"
@@ -378,4 +349,4 @@ EOF
[[ "$output" =~ "SerialMessage" ]] || false
[[ "$output" =~ "{ key: 73000000, e6000000 ref: #pdcuscnfqsusgil1642k5hup1cp5co6t }" ]] || false
[[ "$output" =~ "{ key: f4090000, e8130000 ref: #hddhk8djkj275q1so9fs3ag48v7qsfsi }" ]] || false
}
}
+15
View File
@@ -77,6 +77,11 @@ function test_backward_compatibility() {
function test_bidirectional_compatibility() {
ver=$1
if [ -z $ver ]; then
return
fi
bin=`download_release "$ver"`
DOLT_NEW=`which dolt`
@@ -94,6 +99,11 @@ function test_bidirectional_compatibility() {
function test_bidirectional_remote_compatibility() {
ver=$1
if [ -z $ver ]; then
return
fi
bin=`download_release "$ver"`
DOLT_NEW=`which dolt`
@@ -136,6 +146,11 @@ function list_2_0_forward_compatible_versions() {
function test_forward_compatibility() {
ver=$1
if [ -z $ver ]; then
return
fi
bin=`download_release "$ver"`
DOLT_NEW_BIN=`which dolt` # capture current dolt before PATH is prepended with old binary
@@ -1 +1,2 @@
v1.87.0
v1.84.0