store GEOMETRY types as BLOBs (#6933)

This commit is contained in:
James Cor
2023-11-07 01:49:48 -08:00
committed by GitHub
parent 9d1562ef81
commit c5710b42ab
14 changed files with 230 additions and 77 deletions
+3
View File
@@ -47,6 +47,7 @@ const (
EncodingStringAddr Encoding = 23
EncodingJSONAddr Encoding = 24
EncodingCell Encoding = 25
EncodingGeomAddr Encoding = 26
EncodingString Encoding = 128
EncodingBytes Encoding = 129
EncodingDecimal Encoding = 130
@@ -79,6 +80,7 @@ var EnumNamesEncoding = map[Encoding]string{
EncodingStringAddr: "StringAddr",
EncodingJSONAddr: "JSONAddr",
EncodingCell: "Cell",
EncodingGeomAddr: "GeomAddr",
EncodingString: "String",
EncodingBytes: "Bytes",
EncodingDecimal: "Decimal",
@@ -111,6 +113,7 @@ var EnumValuesEncoding = map[string]Encoding{
"StringAddr": EncodingStringAddr,
"JSONAddr": EncodingJSONAddr,
"Cell": EncodingCell,
"GeomAddr": EncodingGeomAddr,
"String": EncodingString,
"Bytes": EncodingBytes,
"Decimal": EncodingDecimal,
+1 -1
View File
@@ -50,7 +50,7 @@ type FeatureVersion int64
// DoltFeatureVersion is described in feature_version.md.
// only variable for testing.
var DoltFeatureVersion FeatureVersion = 5 // last bumped when adding virtual columns to schema storage
var DoltFeatureVersion FeatureVersion = 6 // last bumped when changing geometry types to be stored as BLOBs
// RootValue is the value of the Database and is the committed value in every Dolt commit.
type RootValue struct {
@@ -76,7 +76,7 @@ func EncodingFromSqlType(typ query.Type) serial.Encoding {
case query.Type_VARCHAR:
return serial.EncodingString
case query.Type_GEOMETRY:
return serial.EncodingGeometry
return serial.EncodingGeomAddr
case query.Type_JSON:
return serial.EncodingJSONAddr
case query.Type_BLOB:
@@ -207,9 +207,13 @@ func validateKeylessIndex(ctx context.Context, sch schema.Schema, def schema.Ind
return err
}
} else if def.IsSpatial() {
geom, _, err := sqltypes.GeometryType{}.Convert(field[:len(field)-1])
geom, err := dereferenceGeometry(ctx, vd, j+1, value, secondary.NodeStore())
if err != nil {
panic(err)
return err
}
geom, _, err = sqltypes.GeometryType{}.Convert(geom)
if err != nil {
return err
}
cell := index.ZCell(geom.(sqltypes.GeometryValue))
field = cell[:]
@@ -298,9 +302,13 @@ func validatePkIndex(ctx context.Context, sch schema.Schema, def schema.Index, p
return err
}
} else if def.IsSpatial() {
geom, _, err := sqltypes.GeometryType{}.Convert(field[:len(field)-1])
geom, err := dereferenceGeometry(ctx, vd, j-pkSize, value, secondary.NodeStore())
if err != nil {
panic(err)
return err
}
geom, _, err = sqltypes.GeometryType{}.Convert(geom)
if err != nil {
return err
}
cell := index.ZCell(geom.(sqltypes.GeometryValue))
field = cell[:]
@@ -379,6 +387,31 @@ func dereferenceContent(ctx context.Context, tableValueDescriptor val.TupleDesc,
}
}
// dereferenceGeometry dereferences an address encoded geometry field to load the content
// and return a GeometryType. |tableValueDescriptor| is the tuple descriptor for the value tuple of the main
// table, |tablePos| is the field index into the value tuple, and |tuple| is the value tuple from the
// main table.
func dereferenceGeometry(ctx context.Context, tableValueDescriptor val.TupleDesc, tablePos int, tuple val.Tuple, ns tree.NodeStore) (interface{}, error) {
v, err := index.GetField(ctx, tableValueDescriptor, tablePos, tuple, ns)
if err != nil {
return nil, err
}
if v == nil {
return nil, nil
}
switch x := v.(type) {
case string:
return []byte(x), nil
case []byte:
return x, nil
case sqltypes.Point, sqltypes.LineString, sqltypes.Polygon, sqltypes.MultiPoint, sqltypes.MultiLineString, sqltypes.MultiPolygon, sqltypes.GeometryType, sqltypes.GeomColl:
return x, nil
default:
return nil, fmt.Errorf("unexpected type for address encoded content: %T", v)
}
}
// trimValueToPrefixLength trims |value| by truncating the bytes after |prefixLength|. If |prefixLength|
// is zero or if |value| is nil, then no trimming is done and |value| is directly returned. The
// |encoding| param indicates the original encoding of |value| in the source table.
@@ -91,11 +91,30 @@ func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns tr
err = json.Unmarshal(buf, &doc.Val)
v = doc
}
// TODO: eventually remove this, and only read GeomAddrEnc
case val.GeometryEnc:
var buf []byte
buf, ok = td.GetGeometry(i, tup)
if ok {
v = deserializeGeometry(buf)
v, err = deserializeGeometry(buf)
}
case val.GeomAddrEnc:
// TODO: until GeometryEnc is removed, we must check if GeomAddrEnc is a GeometryEnc
var buf []byte
buf, ok = td.GetGeometry(i, tup)
if ok {
v, err = deserializeGeometry(buf)
}
if !ok || err != nil {
var h hash.Hash
h, ok = td.GetGeometryAddr(i, tup)
if ok {
buf, err = tree.NewByteArray(h, ns).ToBytes(ctx)
if err != nil {
return nil, err
}
v, err = deserializeGeometry(buf)
}
}
case val.Hash128Enc:
v, ok = td.GetHash128(i, tup)
@@ -198,12 +217,21 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
tb.PutByteString(i, v.([]byte))
case val.Hash128Enc:
tb.PutHash128(i, v.([]byte))
// TODO: eventually remove GeometryEnc, but in the meantime write them as GeomAddrEnc
case val.GeometryEnc:
geo := serializeGeometry(v)
if len(geo) > math.MaxUint16 {
return ErrValueExceededMaxFieldSize
h, err := serializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
if err != nil {
return err
}
tb.PutGeometry(i, geo)
tb.PutGeometryAddr(i, h)
case val.GeomAddrEnc:
geo := serializeGeometry(v)
h, err := serializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
if err != nil {
return err
}
tb.PutGeometryAddr(i, h)
case val.JSONAddrEnc:
buf, err := convJson(v)
if err != nil {
@@ -231,7 +259,11 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
tb.PutCommitAddr(i, v.(hash.Hash))
case val.CellEnc:
if _, ok := v.([]byte); ok {
v = deserializeGeometry(v.([]byte))
var err error
v, err = deserializeGeometry(v.([]byte))
if err != nil {
return err
}
}
tb.PutCell(i, ZCell(v.(types.GeometryValue)))
default:
@@ -292,26 +324,29 @@ func convUint(v interface{}) uint {
}
}
func deserializeGeometry(buf []byte) (v interface{}) {
srid, _, typ, _ := types.DeserializeEWKBHeader(buf)
func deserializeGeometry(buf []byte) (v interface{}, err error) {
srid, _, typ, err := types.DeserializeEWKBHeader(buf)
if err != nil {
return nil, err
}
buf = buf[types.EWKBHeaderSize:]
switch typ {
case types.WKBPointID:
v, _, _ = types.DeserializePoint(buf, false, srid)
v, _, err = types.DeserializePoint(buf, false, srid)
case types.WKBLineID:
v, _, _ = types.DeserializeLine(buf, false, srid)
v, _, err = types.DeserializeLine(buf, false, srid)
case types.WKBPolyID:
v, _, _ = types.DeserializePoly(buf, false, srid)
v, _, err = types.DeserializePoly(buf, false, srid)
case types.WKBMultiPointID:
v, _, _ = types.DeserializeMPoint(buf, false, srid)
v, _, err = types.DeserializeMPoint(buf, false, srid)
case types.WKBMultiLineID:
v, _, _ = types.DeserializeMLine(buf, false, srid)
v, _, err = types.DeserializeMLine(buf, false, srid)
case types.WKBMultiPolyID:
v, _, _ = types.DeserializeMPoly(buf, false, srid)
v, _, err = types.DeserializeMPoly(buf, false, srid)
case types.WKBGeomCollID:
v, _, _ = types.DeserializeGeomColl(buf, false, srid)
v, _, err = types.DeserializeGeomColl(buf, false, srid)
default:
panic(fmt.Sprintf("unknown geometry type %d", typ))
return nil, fmt.Errorf("unknown geometry type %d", typ)
}
return
}
@@ -155,17 +155,17 @@ func TestRoundTripProllyFields(t *testing.T) {
},
{
name: "point",
typ: val.Type{Enc: val.GeometryEnc},
typ: val.Type{Enc: val.GeomAddrEnc},
value: mustParseGeometryType(t, "POINT(1 2)"),
},
{
name: "linestring",
typ: val.Type{Enc: val.GeometryEnc},
typ: val.Type{Enc: val.GeomAddrEnc},
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
},
{
name: "polygon",
typ: val.Type{Enc: val.GeometryEnc},
typ: val.Type{Enc: val.GeomAddrEnc},
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
},
{
@@ -244,3 +244,49 @@ func dateFromTime(t time.Time) time.Time {
y, m, d := t.Year(), t.Month(), t.Day()
return time.Date(y, m, d, 0, 0, 0, 0, time.UTC)
}
// TestGeometryEncoding contains tests that ensure backwards compatibility with the old geometry encoding.
//
// Initially, Geometries were stored in line, but now they are stored out of band as BLOBs.
func TestGeometryEncoding(t *testing.T) {
tests := []struct {
name string
value interface{}
}{
{
name: "point",
value: mustParseGeometryType(t, "POINT(1 2)"),
},
{
name: "linestring",
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
},
{
name: "polygon",
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ns := tree.NewTestNodeStore()
oldDesc := val.NewTupleDescriptor(val.Type{Enc: val.GeometryEnc})
builder := val.NewTupleBuilder(oldDesc)
b := serializeGeometry(test.value)
builder.PutGeometry(0, b)
tup := builder.Build(testPool)
var v interface{}
var err error
v, err = GetField(context.Background(), oldDesc, 0, tup, ns)
assert.NoError(t, err)
assert.Equal(t, test.value, v)
newDesc := val.NewTupleDescriptor(val.Type{Enc: val.GeometryEnc})
v, err = GetField(context.Background(), newDesc, 0, tup, ns)
assert.NoError(t, err)
assert.Equal(t, test.value, v)
})
}
}
+1 -1
View File
@@ -91,7 +91,7 @@ func BasicSelectTests() []SelectTest {
var headCommitHash string
switch types.Format_Default {
case types.Format_DOLT:
headCommitHash = "li3mp6hml1bctgon5hptfh9b8rqc1i6a"
headCommitHash = "6665g1bg08efo1sr2ui23iulsc7h22hd"
case types.Format_LD_1:
headCommitHash = "73hc2robs4v0kt9taoe3m5hd49dmrgun"
}
+51 -50
View File
@@ -1,50 +1,51 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace serial;
enum Encoding : uint8 {
// fixed width
Null = 0,
Int8 = 1,
Uint8 = 2,
Int16 = 3,
Uint16 = 4,
Int32 = 7,
Uint32 = 8,
Int64 = 9,
Uint64 = 10,
Float32 = 11,
Float64 = 12,
Bit64 = 13,
Hash128 = 14,
Year = 15,
Date = 16,
Time = 17,
Datetime = 18,
Enum = 19,
Set = 20,
BytesAddr = 21,
CommitAddr = 22,
StringAddr = 23,
JSONAddr = 24,
Cell = 25,
// variable width
String = 128,
Bytes = 129,
Decimal = 130,
JSON = 131,
Geometry = 133,
}
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace serial;
enum Encoding : uint8 {
// fixed width
Null = 0,
Int8 = 1,
Uint8 = 2,
Int16 = 3,
Uint16 = 4,
Int32 = 7,
Uint32 = 8,
Int64 = 9,
Uint64 = 10,
Float32 = 11,
Float64 = 12,
Bit64 = 13,
Hash128 = 14,
Year = 15,
Date = 16,
Time = 17,
Datetime = 18,
Enum = 19,
Set = 20,
BytesAddr = 21,
CommitAddr = 22,
StringAddr = 23,
JSONAddr = 24,
Cell = 25,
GeomAddr = 26,
// variable width
String = 128,
Bytes = 129,
Decimal = 130,
JSON = 131,
Geometry = 133,
}
+4
View File
@@ -64,6 +64,7 @@ const (
stringAddrEnc ByteSize = hash.ByteLen
jsonAddrEnc ByteSize = hash.ByteLen
cellSize ByteSize = 17
geomAddrEnc ByteSize = hash.ByteLen
)
type Encoding byte
@@ -94,6 +95,7 @@ const (
StringAddrEnc = Encoding(serial.EncodingStringAddr)
JSONAddrEnc = Encoding(serial.EncodingJSONAddr)
CellEnc = Encoding(serial.EncodingCell)
GeomAddrEnc = Encoding(serial.EncodingGeomAddr)
sentinel Encoding = 127
)
@@ -153,6 +155,8 @@ func sizeFromType(t Type) (ByteSize, bool) {
return stringAddrEnc, true
case JSONAddrEnc:
return jsonAddrEnc, true
case GeomAddrEnc:
return geomAddrEnc, true
default:
return 0, false
}
+7
View File
@@ -331,6 +331,13 @@ func (tb *TupleBuilder) PutGeometry(i int, v []byte) {
tb.pos += sz
}
// PutGeometryAddr writes a Geometry's address ref to the ith field
func (tb *TupleBuilder) PutGeometryAddr(i int, v hash.Hash) {
tb.Desc.expectEncoding(i, GeomAddrEnc)
tb.ensureCapacity(hash.ByteLen)
tb.putAddr(i, v)
}
// PutHash128 writes a hash128 to the ith field of the Tuple being built.
func (tb *TupleBuilder) PutHash128(i int, v []byte) {
tb.Desc.expectEncoding(i, Hash128Enc)
+8 -1
View File
@@ -430,7 +430,8 @@ func (td TupleDesc) GetJSON(i int, tup Tuple) (v []byte, ok bool) {
// GetGeometry reads a []byte from the ith field of the Tuple.
// If the ith field is NULL, |ok| is set to false.
func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) {
td.expectEncoding(i, GeometryEnc)
// TODO: we are support both Geometry and GeometryAddr for now, so we can't expect just one
// td.expectEncoding(i, GeometryEnc)
b := td.GetField(i, tup)
if b != nil {
v = readByteString(b)
@@ -439,6 +440,12 @@ func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) {
return
}
func (td TupleDesc) GetGeometryAddr(i int, tup Tuple) (hash.Hash, bool) {
// TODO: we are support both Geometry and GeometryAddr for now, so we can't expect just one
// td.expectEncoding(i, GeomAddrEnc)
return td.getAddr(i, tup)
}
func (td TupleDesc) GetHash128(i int, tup Tuple) (v []byte, ok bool) {
td.expectEncoding(i, Hash128Enc)
b := td.GetField(i, tup)
File diff suppressed because one or more lines are too long
+1 -1
View File
@@ -81,7 +81,7 @@ assert_feature_version() {
# Tests that don't end in a valid dolt dir will fail the above
# command, don't check its output in that case
if [ "$status" -eq 0 ]; then
[[ "$output" =~ "feature version: 5" ]] || exit 1
[[ "$output" =~ "feature version: 6" ]] || exit 1
else
# Clear status to avoid BATS failing if this is the last run command
status=0
@@ -74,6 +74,16 @@ teardown() {
[[ "$output" =~ "POLYGON((0.123 0.456,1.22 1.33,1.11 0.99,0.123 0.456))" ]] || false
}
@test "sql-spatial-types: can create large geometry" {
run dolt sql < $BATS_TEST_DIRNAME/helper/big_spatial.sql
[ "$status" -eq 0 ]
[[ "$output" =~ "Query OK" ]] || false
run dolt sql -q "select count(*) from t"
[ "$status" -eq 0 ]
[[ "$output" =~ "1" ]] || false
}
@test "sql-spatial-types: create geometry table and insert existing spatial types" {
# create geometry table