From f28fbd0760679da293e1b0d8bfb109bf08714294 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 16 Sep 2025 14:20:21 -0700 Subject: [PATCH 1/3] Put correct encoding when putting ExtendedAdaptiveEnc values. --- go/store/prolly/tree/prolly_fields.go | 27 +++++++-- go/store/val/tuple_builder.go | 80 +++++++++++++-------------- 2 files changed, 61 insertions(+), 46 deletions(-) diff --git a/go/store/prolly/tree/prolly_fields.go b/go/store/prolly/tree/prolly_fields.go index 3be0029285..06c8597ec2 100644 --- a/go/store/prolly/tree/prolly_fields.go +++ b/go/store/prolly/tree/prolly_fields.go @@ -360,11 +360,30 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v } } case val.ExtendedAdaptiveEnc: - b, err := tb.Desc.Handlers[i].SerializeValue(ctx, v) - if err != nil { - return err + switch value := v.(type) { + case string: + valueBytes, err := tb.Desc.Handlers[i].SerializeValue(ctx, v) + if err != nil { + return err + } + err = tb.PutAdaptiveValue(ctx, ns, i, valueBytes) + if err != nil { + return err + } + case *val.ExtendedValueWrapper: + if value.IsExactLength() { + tb.PutAdaptiveExtendedFromOutline(i, value) + } else { + valueBytes, err := value.GetBytes(ctx) + if err != nil { + return err + } + err = tb.PutAdaptiveBytesFromInline(ctx, i, valueBytes) + if err != nil { + return err + } + } } - tb.PutRaw(i, b) default: panic(fmt.Sprintf("unknown encoding %v %v", enc, v)) } diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 70d84fd3cf..7726935977 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -542,6 +542,21 @@ func (tb *TupleBuilder) PutCell(i int, v Cell) { func (tb *TupleBuilder) PutAdaptiveBytesFromInline(ctx context.Context, i int, v []byte) error { tb.Desc.expectEncoding(i, BytesAdaptiveEnc) + return tb.PutAdaptiveFromInline(ctx, i, v) +} + +func (tb *TupleBuilder) PutAdaptiveStringFromInline(ctx context.Context, i int, s string) error { + tb.Desc.expectEncoding(i, StringAdaptiveEnc) + return tb.PutAdaptiveFromInline(ctx, i, []byte(s)) +} + +func (tb *TupleBuilder) PutAdaptiveExtendedFromInline(ctx context.Context, i int, v []byte) error { + tb.Desc.expectEncoding(i, ExtendedAdaptiveEnc) + return tb.PutAdaptiveFromInline(ctx, i, v) +} + +func (tb *TupleBuilder) PutAdaptiveFromInline(ctx context.Context, i int, v []byte) error { + inlineSize := ByteSize(len(v)) + 1 // include extra header byte if int64(inlineSize) > tb.tupleLengthTarget { // Inline value is too large. We must store it out-of-band. @@ -574,67 +589,48 @@ func (tb *TupleBuilder) PutAdaptiveBytesFromInline(ctx context.Context, i int, v return nil } -func (tb *TupleBuilder) PutAdaptiveStringFromInline(ctx context.Context, i int, v string) error { - tb.Desc.expectEncoding(i, StringAdaptiveEnc) - inlineSize := ByteSize(len(v)) + 1 // include extra header byte - if int64(inlineSize) > tb.tupleLengthTarget { - // Inline value is too large. We must store it out of line. - maxLengthBytes := 9 - tb.ensureCapacity(ByteSize(hash.ByteLen + maxLengthBytes)) - blobLength := uint64(len(v)) - lengthSize, _ := makeVarInt(blobLength, tb.buf[tb.pos:]) - outOfBandSize := int64(lengthSize) + hash.ByteLen - - blobHash, err := tb.vs.WriteBytes(ctx, []byte(v)) +func (tb *TupleBuilder) PutAdaptiveValue(ctx context.Context, vs ValueStore, i int, v AdaptiveValue) error { + if v.getMessageLength() > tb.tupleLengthTarget { + // The message won't fit inlined, so premptively store it out-of-band + byteArray, err := v.convertToByteArray(ctx, vs, nil) if err != nil { return err } - copy(tb.buf[tb.pos+int64(lengthSize):], blobHash[:]) - field := tb.buf[tb.pos : tb.pos+outOfBandSize] - tb.fields[i] = field - tb.pos += outOfBandSize - tb.inlineSize += int64(inlineSize) - tb.outOfBandSize += outOfBandSize + tb.PutAdaptiveFromOutline(i, byteArray.maxByteLength, byteArray.Addr) return nil + } else { + bytes, err := v.getUnderlyingBytes(ctx, vs) + if err != nil { + return err + } + return tb.PutAdaptiveFromInline(ctx, i, bytes) } - tb.ensureCapacity(inlineSize) - field := AdaptiveValue(tb.buf[tb.pos : tb.pos+int64(inlineSize)]) - tb.fields[i] = field - field[0] = 0 // Mark this as inline - copy(field[1:], v) - tb.pos += int64(inlineSize) - tb.inlineSize += int64(inlineSize) - tb.outOfBandSize += field.outOfBandSize() - return nil +} + +func (tb *TupleBuilder) PutAdaptiveExtendedFromOutline(i int, v *ExtendedValueWrapper) { + tb.Desc.expectEncoding(i, ExtendedAdaptiveEnc) + tb.PutAdaptiveFromOutline(i, v.outOfBandLength, v.Addr) } func (tb *TupleBuilder) PutAdaptiveBytesFromOutline(i int, v *ByteArray) { tb.Desc.expectEncoding(i, BytesAdaptiveEnc) - - maxLengthBytes := 9 - tb.ensureCapacity(ByteSize(hash.ByteLen + maxLengthBytes)) - blobLength := uint64(v.MaxByteLength()) - lengthSize, _ := makeVarInt(blobLength, tb.buf[tb.pos:]) - outOfBandSize := int64(lengthSize) + hash.ByteLen - - copy(tb.buf[tb.pos+int64(lengthSize):], v.Addr[:]) - field := tb.buf[tb.pos : tb.pos+outOfBandSize] - tb.fields[i] = field - tb.pos += outOfBandSize - tb.inlineSize += int64(blobLength) + 1 - tb.outOfBandSize += outOfBandSize + tb.PutAdaptiveFromOutline(i, v.maxByteLength, v.Addr) } func (tb *TupleBuilder) PutAdaptiveStringFromOutline(i int, v *TextStorage) { tb.Desc.expectEncoding(i, StringAdaptiveEnc) + tb.PutAdaptiveFromOutline(i, v.maxByteLength, v.Addr) +} + +func (tb *TupleBuilder) PutAdaptiveFromOutline(i int, maxByteLength int64, addr hash.Hash) { maxLengthBytes := 9 tb.ensureCapacity(ByteSize(hash.ByteLen + maxLengthBytes)) - blobLength := uint64(v.MaxByteLength()) + blobLength := uint64(maxByteLength) lengthSize, _ := makeVarInt(blobLength, tb.buf[tb.pos:]) outOfBandSize := int64(lengthSize) + hash.ByteLen - copy(tb.buf[tb.pos+int64(lengthSize):], v.Addr[:]) + copy(tb.buf[tb.pos+int64(lengthSize):], addr[:]) field := tb.buf[tb.pos : tb.pos+outOfBandSize] tb.fields[i] = field tb.pos += outOfBandSize From bb53c6893ace9682a4b0f75e9181f93231cb8092 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 17 Sep 2025 16:28:42 -0700 Subject: [PATCH 2/3] Re-enable Extended Adaptive Encoding --- go/libraries/doltcore/schema/serial_encoding.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/go/libraries/doltcore/schema/serial_encoding.go b/go/libraries/doltcore/schema/serial_encoding.go index bbc9eb5336..68fc126bf3 100644 --- a/go/libraries/doltcore/schema/serial_encoding.go +++ b/go/libraries/doltcore/schema/serial_encoding.go @@ -30,8 +30,7 @@ func EncodingFromSqlType(typ sql.Type) serial.Encoding { case sql.ExtendedTypeSerializedWidth_64K: return serial.EncodingExtended case sql.ExtendedTypeSerializedWidth_Unbounded: - // TODO: should use serial.EncodingExtendedAdaptive, but it's currently broken - return serial.EncodingExtendedAddr + return serial.EncodingExtendedAdaptive default: panic(fmt.Errorf("unknown serialization width")) } From d9553e67e43dce9d86c14f2779996723f4cc67dc Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 19 Sep 2025 15:23:28 -0700 Subject: [PATCH 3/3] Fix issue when writing to ExtendedAdaptiveEncoding columns where I assumed the written values were strings. --- go/store/prolly/tree/prolly_fields.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/go/store/prolly/tree/prolly_fields.go b/go/store/prolly/tree/prolly_fields.go index 06c8597ec2..c57e795192 100644 --- a/go/store/prolly/tree/prolly_fields.go +++ b/go/store/prolly/tree/prolly_fields.go @@ -361,15 +361,6 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v } case val.ExtendedAdaptiveEnc: switch value := v.(type) { - case string: - valueBytes, err := tb.Desc.Handlers[i].SerializeValue(ctx, v) - if err != nil { - return err - } - err = tb.PutAdaptiveValue(ctx, ns, i, valueBytes) - if err != nil { - return err - } case *val.ExtendedValueWrapper: if value.IsExactLength() { tb.PutAdaptiveExtendedFromOutline(i, value) @@ -383,6 +374,15 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v return err } } + default: + valueBytes, err := tb.Desc.Handlers[i].SerializeValue(ctx, v) + if err != nil { + return err + } + err = tb.PutAdaptiveValue(ctx, ns, i, valueBytes) + if err != nil { + return err + } } default: panic(fmt.Sprintf("unknown encoding %v %v", enc, v))