mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-01 20:00:22 -05:00
store GEOMETRY types as BLOBs (#6933)
This commit is contained in:
@@ -47,6 +47,7 @@ const (
|
||||
EncodingStringAddr Encoding = 23
|
||||
EncodingJSONAddr Encoding = 24
|
||||
EncodingCell Encoding = 25
|
||||
EncodingGeomAddr Encoding = 26
|
||||
EncodingString Encoding = 128
|
||||
EncodingBytes Encoding = 129
|
||||
EncodingDecimal Encoding = 130
|
||||
@@ -79,6 +80,7 @@ var EnumNamesEncoding = map[Encoding]string{
|
||||
EncodingStringAddr: "StringAddr",
|
||||
EncodingJSONAddr: "JSONAddr",
|
||||
EncodingCell: "Cell",
|
||||
EncodingGeomAddr: "GeomAddr",
|
||||
EncodingString: "String",
|
||||
EncodingBytes: "Bytes",
|
||||
EncodingDecimal: "Decimal",
|
||||
@@ -111,6 +113,7 @@ var EnumValuesEncoding = map[string]Encoding{
|
||||
"StringAddr": EncodingStringAddr,
|
||||
"JSONAddr": EncodingJSONAddr,
|
||||
"Cell": EncodingCell,
|
||||
"GeomAddr": EncodingGeomAddr,
|
||||
"String": EncodingString,
|
||||
"Bytes": EncodingBytes,
|
||||
"Decimal": EncodingDecimal,
|
||||
|
||||
@@ -50,7 +50,7 @@ type FeatureVersion int64
|
||||
|
||||
// DoltFeatureVersion is described in feature_version.md.
|
||||
// only variable for testing.
|
||||
var DoltFeatureVersion FeatureVersion = 5 // last bumped when adding virtual columns to schema storage
|
||||
var DoltFeatureVersion FeatureVersion = 6 // last bumped when changing geometry types to be stored as BLOBs
|
||||
|
||||
// RootValue is the value of the Database and is the committed value in every Dolt commit.
|
||||
type RootValue struct {
|
||||
|
||||
@@ -76,7 +76,7 @@ func EncodingFromSqlType(typ query.Type) serial.Encoding {
|
||||
case query.Type_VARCHAR:
|
||||
return serial.EncodingString
|
||||
case query.Type_GEOMETRY:
|
||||
return serial.EncodingGeometry
|
||||
return serial.EncodingGeomAddr
|
||||
case query.Type_JSON:
|
||||
return serial.EncodingJSONAddr
|
||||
case query.Type_BLOB:
|
||||
|
||||
@@ -207,9 +207,13 @@ func validateKeylessIndex(ctx context.Context, sch schema.Schema, def schema.Ind
|
||||
return err
|
||||
}
|
||||
} else if def.IsSpatial() {
|
||||
geom, _, err := sqltypes.GeometryType{}.Convert(field[:len(field)-1])
|
||||
geom, err := dereferenceGeometry(ctx, vd, j+1, value, secondary.NodeStore())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
return err
|
||||
}
|
||||
geom, _, err = sqltypes.GeometryType{}.Convert(geom)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cell := index.ZCell(geom.(sqltypes.GeometryValue))
|
||||
field = cell[:]
|
||||
@@ -298,9 +302,13 @@ func validatePkIndex(ctx context.Context, sch schema.Schema, def schema.Index, p
|
||||
return err
|
||||
}
|
||||
} else if def.IsSpatial() {
|
||||
geom, _, err := sqltypes.GeometryType{}.Convert(field[:len(field)-1])
|
||||
geom, err := dereferenceGeometry(ctx, vd, j-pkSize, value, secondary.NodeStore())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
return err
|
||||
}
|
||||
geom, _, err = sqltypes.GeometryType{}.Convert(geom)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cell := index.ZCell(geom.(sqltypes.GeometryValue))
|
||||
field = cell[:]
|
||||
@@ -379,6 +387,31 @@ func dereferenceContent(ctx context.Context, tableValueDescriptor val.TupleDesc,
|
||||
}
|
||||
}
|
||||
|
||||
// dereferenceGeometry dereferences an address encoded geometry field to load the content
|
||||
// and return a GeometryType. |tableValueDescriptor| is the tuple descriptor for the value tuple of the main
|
||||
// table, |tablePos| is the field index into the value tuple, and |tuple| is the value tuple from the
|
||||
// main table.
|
||||
func dereferenceGeometry(ctx context.Context, tableValueDescriptor val.TupleDesc, tablePos int, tuple val.Tuple, ns tree.NodeStore) (interface{}, error) {
|
||||
v, err := index.GetField(ctx, tableValueDescriptor, tablePos, tuple, ns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if v == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
return []byte(x), nil
|
||||
case []byte:
|
||||
return x, nil
|
||||
case sqltypes.Point, sqltypes.LineString, sqltypes.Polygon, sqltypes.MultiPoint, sqltypes.MultiLineString, sqltypes.MultiPolygon, sqltypes.GeometryType, sqltypes.GeomColl:
|
||||
return x, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected type for address encoded content: %T", v)
|
||||
}
|
||||
}
|
||||
|
||||
// trimValueToPrefixLength trims |value| by truncating the bytes after |prefixLength|. If |prefixLength|
|
||||
// is zero or if |value| is nil, then no trimming is done and |value| is directly returned. The
|
||||
// |encoding| param indicates the original encoding of |value| in the source table.
|
||||
|
||||
@@ -91,11 +91,30 @@ func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns tr
|
||||
err = json.Unmarshal(buf, &doc.Val)
|
||||
v = doc
|
||||
}
|
||||
// TODO: eventually remove this, and only read GeomAddrEnc
|
||||
case val.GeometryEnc:
|
||||
var buf []byte
|
||||
buf, ok = td.GetGeometry(i, tup)
|
||||
if ok {
|
||||
v = deserializeGeometry(buf)
|
||||
v, err = deserializeGeometry(buf)
|
||||
}
|
||||
case val.GeomAddrEnc:
|
||||
// TODO: until GeometryEnc is removed, we must check if GeomAddrEnc is a GeometryEnc
|
||||
var buf []byte
|
||||
buf, ok = td.GetGeometry(i, tup)
|
||||
if ok {
|
||||
v, err = deserializeGeometry(buf)
|
||||
}
|
||||
if !ok || err != nil {
|
||||
var h hash.Hash
|
||||
h, ok = td.GetGeometryAddr(i, tup)
|
||||
if ok {
|
||||
buf, err = tree.NewByteArray(h, ns).ToBytes(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
v, err = deserializeGeometry(buf)
|
||||
}
|
||||
}
|
||||
case val.Hash128Enc:
|
||||
v, ok = td.GetHash128(i, tup)
|
||||
@@ -198,12 +217,21 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
|
||||
tb.PutByteString(i, v.([]byte))
|
||||
case val.Hash128Enc:
|
||||
tb.PutHash128(i, v.([]byte))
|
||||
// TODO: eventually remove GeometryEnc, but in the meantime write them as GeomAddrEnc
|
||||
case val.GeometryEnc:
|
||||
geo := serializeGeometry(v)
|
||||
if len(geo) > math.MaxUint16 {
|
||||
return ErrValueExceededMaxFieldSize
|
||||
h, err := serializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tb.PutGeometry(i, geo)
|
||||
tb.PutGeometryAddr(i, h)
|
||||
case val.GeomAddrEnc:
|
||||
geo := serializeGeometry(v)
|
||||
h, err := serializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tb.PutGeometryAddr(i, h)
|
||||
case val.JSONAddrEnc:
|
||||
buf, err := convJson(v)
|
||||
if err != nil {
|
||||
@@ -231,7 +259,11 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
|
||||
tb.PutCommitAddr(i, v.(hash.Hash))
|
||||
case val.CellEnc:
|
||||
if _, ok := v.([]byte); ok {
|
||||
v = deserializeGeometry(v.([]byte))
|
||||
var err error
|
||||
v, err = deserializeGeometry(v.([]byte))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
tb.PutCell(i, ZCell(v.(types.GeometryValue)))
|
||||
default:
|
||||
@@ -292,26 +324,29 @@ func convUint(v interface{}) uint {
|
||||
}
|
||||
}
|
||||
|
||||
func deserializeGeometry(buf []byte) (v interface{}) {
|
||||
srid, _, typ, _ := types.DeserializeEWKBHeader(buf)
|
||||
func deserializeGeometry(buf []byte) (v interface{}, err error) {
|
||||
srid, _, typ, err := types.DeserializeEWKBHeader(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
buf = buf[types.EWKBHeaderSize:]
|
||||
switch typ {
|
||||
case types.WKBPointID:
|
||||
v, _, _ = types.DeserializePoint(buf, false, srid)
|
||||
v, _, err = types.DeserializePoint(buf, false, srid)
|
||||
case types.WKBLineID:
|
||||
v, _, _ = types.DeserializeLine(buf, false, srid)
|
||||
v, _, err = types.DeserializeLine(buf, false, srid)
|
||||
case types.WKBPolyID:
|
||||
v, _, _ = types.DeserializePoly(buf, false, srid)
|
||||
v, _, err = types.DeserializePoly(buf, false, srid)
|
||||
case types.WKBMultiPointID:
|
||||
v, _, _ = types.DeserializeMPoint(buf, false, srid)
|
||||
v, _, err = types.DeserializeMPoint(buf, false, srid)
|
||||
case types.WKBMultiLineID:
|
||||
v, _, _ = types.DeserializeMLine(buf, false, srid)
|
||||
v, _, err = types.DeserializeMLine(buf, false, srid)
|
||||
case types.WKBMultiPolyID:
|
||||
v, _, _ = types.DeserializeMPoly(buf, false, srid)
|
||||
v, _, err = types.DeserializeMPoly(buf, false, srid)
|
||||
case types.WKBGeomCollID:
|
||||
v, _, _ = types.DeserializeGeomColl(buf, false, srid)
|
||||
v, _, err = types.DeserializeGeomColl(buf, false, srid)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown geometry type %d", typ))
|
||||
return nil, fmt.Errorf("unknown geometry type %d", typ)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -155,17 +155,17 @@ func TestRoundTripProllyFields(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "point",
|
||||
typ: val.Type{Enc: val.GeometryEnc},
|
||||
typ: val.Type{Enc: val.GeomAddrEnc},
|
||||
value: mustParseGeometryType(t, "POINT(1 2)"),
|
||||
},
|
||||
{
|
||||
name: "linestring",
|
||||
typ: val.Type{Enc: val.GeometryEnc},
|
||||
typ: val.Type{Enc: val.GeomAddrEnc},
|
||||
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
|
||||
},
|
||||
{
|
||||
name: "polygon",
|
||||
typ: val.Type{Enc: val.GeometryEnc},
|
||||
typ: val.Type{Enc: val.GeomAddrEnc},
|
||||
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
|
||||
},
|
||||
{
|
||||
@@ -244,3 +244,49 @@ func dateFromTime(t time.Time) time.Time {
|
||||
y, m, d := t.Year(), t.Month(), t.Day()
|
||||
return time.Date(y, m, d, 0, 0, 0, 0, time.UTC)
|
||||
}
|
||||
|
||||
// TestGeometryEncoding contains tests that ensure backwards compatibility with the old geometry encoding.
|
||||
//
|
||||
// Initially, Geometries were stored in line, but now they are stored out of band as BLOBs.
|
||||
func TestGeometryEncoding(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value interface{}
|
||||
}{
|
||||
{
|
||||
name: "point",
|
||||
value: mustParseGeometryType(t, "POINT(1 2)"),
|
||||
},
|
||||
{
|
||||
name: "linestring",
|
||||
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
|
||||
},
|
||||
{
|
||||
name: "polygon",
|
||||
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
ns := tree.NewTestNodeStore()
|
||||
oldDesc := val.NewTupleDescriptor(val.Type{Enc: val.GeometryEnc})
|
||||
builder := val.NewTupleBuilder(oldDesc)
|
||||
b := serializeGeometry(test.value)
|
||||
builder.PutGeometry(0, b)
|
||||
tup := builder.Build(testPool)
|
||||
|
||||
var v interface{}
|
||||
var err error
|
||||
|
||||
v, err = GetField(context.Background(), oldDesc, 0, tup, ns)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.value, v)
|
||||
|
||||
newDesc := val.NewTupleDescriptor(val.Type{Enc: val.GeometryEnc})
|
||||
v, err = GetField(context.Background(), newDesc, 0, tup, ns)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.value, v)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ func BasicSelectTests() []SelectTest {
|
||||
var headCommitHash string
|
||||
switch types.Format_Default {
|
||||
case types.Format_DOLT:
|
||||
headCommitHash = "li3mp6hml1bctgon5hptfh9b8rqc1i6a"
|
||||
headCommitHash = "6665g1bg08efo1sr2ui23iulsc7h22hd"
|
||||
case types.Format_LD_1:
|
||||
headCommitHash = "73hc2robs4v0kt9taoe3m5hd49dmrgun"
|
||||
}
|
||||
|
||||
+51
-50
@@ -1,50 +1,51 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
namespace serial;
|
||||
|
||||
enum Encoding : uint8 {
|
||||
// fixed width
|
||||
Null = 0,
|
||||
Int8 = 1,
|
||||
Uint8 = 2,
|
||||
Int16 = 3,
|
||||
Uint16 = 4,
|
||||
Int32 = 7,
|
||||
Uint32 = 8,
|
||||
Int64 = 9,
|
||||
Uint64 = 10,
|
||||
Float32 = 11,
|
||||
Float64 = 12,
|
||||
Bit64 = 13,
|
||||
Hash128 = 14,
|
||||
Year = 15,
|
||||
Date = 16,
|
||||
Time = 17,
|
||||
Datetime = 18,
|
||||
Enum = 19,
|
||||
Set = 20,
|
||||
BytesAddr = 21,
|
||||
CommitAddr = 22,
|
||||
StringAddr = 23,
|
||||
JSONAddr = 24,
|
||||
Cell = 25,
|
||||
|
||||
// variable width
|
||||
String = 128,
|
||||
Bytes = 129,
|
||||
Decimal = 130,
|
||||
JSON = 131,
|
||||
Geometry = 133,
|
||||
}
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
namespace serial;
|
||||
|
||||
enum Encoding : uint8 {
|
||||
// fixed width
|
||||
Null = 0,
|
||||
Int8 = 1,
|
||||
Uint8 = 2,
|
||||
Int16 = 3,
|
||||
Uint16 = 4,
|
||||
Int32 = 7,
|
||||
Uint32 = 8,
|
||||
Int64 = 9,
|
||||
Uint64 = 10,
|
||||
Float32 = 11,
|
||||
Float64 = 12,
|
||||
Bit64 = 13,
|
||||
Hash128 = 14,
|
||||
Year = 15,
|
||||
Date = 16,
|
||||
Time = 17,
|
||||
Datetime = 18,
|
||||
Enum = 19,
|
||||
Set = 20,
|
||||
BytesAddr = 21,
|
||||
CommitAddr = 22,
|
||||
StringAddr = 23,
|
||||
JSONAddr = 24,
|
||||
Cell = 25,
|
||||
GeomAddr = 26,
|
||||
|
||||
// variable width
|
||||
String = 128,
|
||||
Bytes = 129,
|
||||
Decimal = 130,
|
||||
JSON = 131,
|
||||
Geometry = 133,
|
||||
}
|
||||
|
||||
@@ -64,6 +64,7 @@ const (
|
||||
stringAddrEnc ByteSize = hash.ByteLen
|
||||
jsonAddrEnc ByteSize = hash.ByteLen
|
||||
cellSize ByteSize = 17
|
||||
geomAddrEnc ByteSize = hash.ByteLen
|
||||
)
|
||||
|
||||
type Encoding byte
|
||||
@@ -94,6 +95,7 @@ const (
|
||||
StringAddrEnc = Encoding(serial.EncodingStringAddr)
|
||||
JSONAddrEnc = Encoding(serial.EncodingJSONAddr)
|
||||
CellEnc = Encoding(serial.EncodingCell)
|
||||
GeomAddrEnc = Encoding(serial.EncodingGeomAddr)
|
||||
|
||||
sentinel Encoding = 127
|
||||
)
|
||||
@@ -153,6 +155,8 @@ func sizeFromType(t Type) (ByteSize, bool) {
|
||||
return stringAddrEnc, true
|
||||
case JSONAddrEnc:
|
||||
return jsonAddrEnc, true
|
||||
case GeomAddrEnc:
|
||||
return geomAddrEnc, true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
|
||||
@@ -331,6 +331,13 @@ func (tb *TupleBuilder) PutGeometry(i int, v []byte) {
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
// PutGeometryAddr writes a Geometry's address ref to the ith field
|
||||
func (tb *TupleBuilder) PutGeometryAddr(i int, v hash.Hash) {
|
||||
tb.Desc.expectEncoding(i, GeomAddrEnc)
|
||||
tb.ensureCapacity(hash.ByteLen)
|
||||
tb.putAddr(i, v)
|
||||
}
|
||||
|
||||
// PutHash128 writes a hash128 to the ith field of the Tuple being built.
|
||||
func (tb *TupleBuilder) PutHash128(i int, v []byte) {
|
||||
tb.Desc.expectEncoding(i, Hash128Enc)
|
||||
|
||||
@@ -430,7 +430,8 @@ func (td TupleDesc) GetJSON(i int, tup Tuple) (v []byte, ok bool) {
|
||||
// GetGeometry reads a []byte from the ith field of the Tuple.
|
||||
// If the ith field is NULL, |ok| is set to false.
|
||||
func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) {
|
||||
td.expectEncoding(i, GeometryEnc)
|
||||
// TODO: we are support both Geometry and GeometryAddr for now, so we can't expect just one
|
||||
// td.expectEncoding(i, GeometryEnc)
|
||||
b := td.GetField(i, tup)
|
||||
if b != nil {
|
||||
v = readByteString(b)
|
||||
@@ -439,6 +440,12 @@ func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) {
|
||||
return
|
||||
}
|
||||
|
||||
func (td TupleDesc) GetGeometryAddr(i int, tup Tuple) (hash.Hash, bool) {
|
||||
// TODO: we are support both Geometry and GeometryAddr for now, so we can't expect just one
|
||||
// td.expectEncoding(i, GeomAddrEnc)
|
||||
return td.getAddr(i, tup)
|
||||
}
|
||||
|
||||
func (td TupleDesc) GetHash128(i int, tup Tuple) (v []byte, ok bool) {
|
||||
td.expectEncoding(i, Hash128Enc)
|
||||
b := td.GetField(i, tup)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -81,7 +81,7 @@ assert_feature_version() {
|
||||
# Tests that don't end in a valid dolt dir will fail the above
|
||||
# command, don't check its output in that case
|
||||
if [ "$status" -eq 0 ]; then
|
||||
[[ "$output" =~ "feature version: 5" ]] || exit 1
|
||||
[[ "$output" =~ "feature version: 6" ]] || exit 1
|
||||
else
|
||||
# Clear status to avoid BATS failing if this is the last run command
|
||||
status=0
|
||||
|
||||
@@ -74,6 +74,16 @@ teardown() {
|
||||
[[ "$output" =~ "POLYGON((0.123 0.456,1.22 1.33,1.11 0.99,0.123 0.456))" ]] || false
|
||||
}
|
||||
|
||||
@test "sql-spatial-types: can create large geometry" {
|
||||
run dolt sql < $BATS_TEST_DIRNAME/helper/big_spatial.sql
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Query OK" ]] || false
|
||||
|
||||
run dolt sql -q "select count(*) from t"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "1" ]] || false
|
||||
}
|
||||
|
||||
@test "sql-spatial-types: create geometry table and insert existing spatial types" {
|
||||
|
||||
# create geometry table
|
||||
|
||||
Reference in New Issue
Block a user