Merge pull request #5206 from dolthub/james/spatial-enc

adding new codec for spatial indexes
This commit is contained in:
James Cor
2023-01-25 16:30:50 -08:00
committed by GitHub
10 changed files with 112 additions and 22 deletions

View File

@@ -45,8 +45,9 @@ const (
EncodingBytesAddr Encoding = 21
EncodingCommitAddr Encoding = 22
EncodingStringAddr Encoding = 23
EncodingJSONAddr Encoding = 24
EncodingString Encoding = 128
EncodingJSONAddr Encoding = 24
EncodingCell Encoding = 25
EncodingString Encoding = 128
EncodingBytes Encoding = 129
EncodingDecimal Encoding = 130
EncodingJSON Encoding = 131
@@ -77,6 +78,7 @@ var EnumNamesEncoding = map[Encoding]string{
EncodingCommitAddr: "CommitAddr",
EncodingStringAddr: "StringAddr",
EncodingJSONAddr: "JSONAddr",
EncodingCell: "Cell",
EncodingString: "String",
EncodingBytes: "Bytes",
EncodingDecimal: "Decimal",
@@ -108,6 +110,7 @@ var EnumValuesEncoding = map[string]Encoding{
"CommitAddr": EncodingCommitAddr,
"StringAddr": EncodingStringAddr,
"JSONAddr": EncodingJSONAddr,
"Cell": EncodingCell,
"String": EncodingString,
"Bytes": EncodingBytes,
"Decimal": EncodingDecimal,

View File

@@ -214,6 +214,8 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
tb.PutStringAddr(i, h)
case val.CommitAddrEnc:
tb.PutCommitAddr(i, v.(hash.Hash))
case val.CellEnc:
tb.PutCell(i, ZCell(v.(types.GeometryValue)))
default:
panic(fmt.Sprintf("unknown encoding %v %v", enc, v))
}

View File

@@ -23,6 +23,8 @@ import (
"github.com/dolthub/go-mysql-server/sql/expression/function/spatial"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/dolt/go/store/val"
)
// LexFloat maps the float64 into an uint64 representation in lexicographical order
@@ -150,20 +152,36 @@ func ZSort(points []types.Point) []types.Point {
return points
}
// ZAddr converts the GeometryValue into a key: (min_z_val, level)
// ZCell converts the GeometryValue into a Cell
// Note: there is an inefficiency here where small polygons may be placed into a level that's significantly larger
func ZAddr(v types.GeometryValue) [17]byte {
func ZCell(v types.GeometryValue) val.Cell {
bbox := spatial.FindBBox(v)
zMin := ZValue(types.Point{X: bbox[0], Y: bbox[1]})
zMax := ZValue(types.Point{X: bbox[2], Y: bbox[3]})
cell := val.Cell{}
binary.BigEndian.PutUint64(cell.ZValue[:], zMin[0])
binary.BigEndian.PutUint64(cell.ZValue[8:], zMin[1])
if res := zMin[0] ^ zMax[0]; res != 0 {
cell.Level = byte(64 - bits.LeadingZeros64(res)/2)
} else {
cell.Level = byte(32 - bits.LeadingZeros64(zMin[1]^zMax[1])/2)
}
return cell
}
// ZAddr converts the GeometryValue into a key: (level, min_z_val)
func ZAddr(v types.GeometryValue) [17]byte {
bbox := spatial.FindBBox(v)
zMin := ZValue(types.Point{X: bbox[0], Y: bbox[1]})
zMax := ZValue(types.Point{X: bbox[2], Y: bbox[3]})
addr := [17]byte{}
binary.BigEndian.PutUint64(addr[1:], zMin[0])
binary.BigEndian.PutUint64(addr[9:], zMin[1])
if res := zMin[0] ^ zMax[0]; res != 0 {
addr[0] = byte(64 - bits.LeadingZeros64(res)/2)
} else {
addr[0] = byte(32 + bits.LeadingZeros64(zMin[1]^zMax[1])/2)
addr[0] = byte(32 - bits.LeadingZeros64(zMin[1]^zMax[1])/2)
}
return addr
}

View File

@@ -197,7 +197,7 @@ func TestZAddr(t *testing.T) {
t.Run("test points z-addrs", func(t *testing.T) {
p := types.Point{X: 1, Y: 2}
res := ZAddr(p)
assert.Equal(t, "40e5555500000000000000000000000000", hex.EncodeToString(res[:]))
assert.Equal(t, "00e5555500000000000000000000000000", hex.EncodeToString(res[:]))
})
t.Run("test linestring z-addrs", func(t *testing.T) {
@@ -222,35 +222,35 @@ func TestZAddr(t *testing.T) {
}
func TestZSort(t *testing.T) {
p1 := types.LineString{Points: []types.Point{ps[16], ps[19]}}
p2 := types.LineString{Points: []types.Point{ps[0], ps[3]}}
p3 := types.LineString{Points: []types.Point{ps[19], ps[24]}}
p4 := types.LineString{Points: []types.Point{ps[3], ps[19]}}
p5 := types.LineString{Points: []types.Point{ps[24], ps[24]}}
p1 := types.LineString{Points: []types.Point{ps[24], ps[24]}}
p2 := types.LineString{Points: []types.Point{ps[16], ps[19]}}
p3 := types.LineString{Points: []types.Point{ps[0], ps[3]}}
p4 := types.LineString{Points: []types.Point{ps[19], ps[24]}}
p5 := types.LineString{Points: []types.Point{ps[3], ps[19]}}
t.Run("test z-addr p1", func(t *testing.T) {
z := ZAddr(p1) // bbox: (0, 0), (1, 1)
assert.Equal(t, "3ec0000000000000000000000000000000", hex.EncodeToString(z[:]))
z := ZAddr(p1) // bbox: (2, 2), (2, 2)
assert.Equal(t, "00f0000000000000000000000000000000", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p2", func(t *testing.T) {
z := ZAddr(p2) // bbox: (-2, -2), (-1, -1)
assert.Equal(t, "3f0fffffffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
z := ZAddr(p2) // bbox: (0, 0), (1, 1)
assert.Equal(t, "3ec0000000000000000000000000000000", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p3", func(t *testing.T) {
z := ZAddr(p3) // bbox: (1, 1), (2, 2)
assert.Equal(t, "3fcfffff00000000000000000000000000", hex.EncodeToString(z[:]))
z := ZAddr(p3) // bbox: (-2, -2), (-1, -1)
assert.Equal(t, "3f0fffffffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p4", func(t *testing.T) {
z := ZAddr(p4) // bbox: (-1, -1), (1, 1)
assert.Equal(t, "40300000ffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
z := ZAddr(p4) // bbox: (1, 1), (2, 2)
assert.Equal(t, "3fcfffff00000000000000000000000000", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p6", func(t *testing.T) {
z := ZAddr(p5) // bbox: (2, 2), (2, 2)
assert.Equal(t, "40f0000000000000000000000000000000", hex.EncodeToString(z[:]))
t.Run("test z-addr p5", func(t *testing.T) {
z := ZAddr(p5) // bbox: (-1, -1), (1, 1)
assert.Equal(t, "40300000ffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
})
t.Run("test z-addr sorting", func(t *testing.T) {

View File

@@ -39,6 +39,7 @@ enum Encoding : uint8 {
CommitAddr = 22,
StringAddr = 23,
JSONAddr = 24,
Cell = 25,
// variable width
String = 128,

View File

@@ -63,6 +63,7 @@ const (
commitAddrEnc ByteSize = hash.ByteLen
stringAddrEnc ByteSize = hash.ByteLen
jsonAddrEnc ByteSize = hash.ByteLen
cellSize ByteSize = 17
)
type Encoding byte
@@ -92,6 +93,7 @@ const (
CommitAddrEnc = Encoding(serial.EncodingCommitAddr)
StringAddrEnc = Encoding(serial.EncodingStringAddr)
JSONAddrEnc = Encoding(serial.EncodingJSONAddr)
CellEnc = Encoding(serial.EncodingCell)
sentinel Encoding = 127
)
@@ -622,3 +624,31 @@ func expectSize(buf []byte, sz ByteSize) {
func stringFromBytes(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
// Cell is a representation of a subregion for Spatial Indexes
// Level encodes the size of the region
// ZValue is the z-value encoding of the minimum point of the bbox of a geometry
type Cell struct {
Level byte
ZValue [16]byte
}
func compareCell(l, r Cell) int {
if l.Level != r.Level {
return int(l.Level - r.Level)
}
return bytes.Compare(l.ZValue[:], r.ZValue[:])
}
func readCell(val []byte) (res Cell) {
expectSize(val, cellSize)
res.Level = val[0]
copy(res.ZValue[:], val[1:])
return
}
func writeCell(buf []byte, v Cell) {
expectSize(buf, cellSize)
buf[0] = v.Level
copy(buf[1:], v.ZValue[:])
}

View File

@@ -240,6 +240,13 @@ func TestCompare(t *testing.T) {
l: encStr("b"), r: encStr("a"),
cmp: 1,
},
// z-address
{
typ: Type{Enc: StringEnc},
l: encCell(Cell{}),
r: encCell(Cell{}),
cmp: 0,
},
}
for _, test := range tests {
@@ -297,6 +304,12 @@ func encStr(s string) []byte {
return buf
}
func encCell(c Cell) []byte {
buf := make([]byte, cellSize)
writeCell(buf, c)
return buf
}
func encYear(y int16) []byte {
buf := make([]byte, yearSize)
writeYear(buf, y)

View File

@@ -366,3 +366,12 @@ func (tb *TupleBuilder) ensureCapacity(sz ByteSize) {
}
}
}
// PutCell writes a Cell to the ith field of the Tuple being built.
func (tb *TupleBuilder) PutCell(i int, v Cell) {
tb.Desc.expectEncoding(i, CellEnc)
tb.ensureCapacity(cellSize)
tb.fields[i] = tb.buf[tb.pos : tb.pos+cellSize]
writeCell(tb.fields[i], v)
tb.pos += cellSize
}

View File

@@ -143,6 +143,8 @@ func compare(typ Type, left, right []byte) int {
return compareAddr(readAddr(left), readAddr(right))
case StringAddrEnc:
return compareAddr(readAddr(left), readAddr(right))
case CellEnc:
return compareCell(readCell(left), readCell(right))
default:
panic("unknown encoding")
}

View File

@@ -468,6 +468,16 @@ func (td TupleDesc) expectEncoding(i int, encodings ...Encoding) {
panic("incorrect value encoding")
}
func (td TupleDesc) GetCell(i int, tup Tuple) (v Cell, ok bool) {
td.expectEncoding(i, CellEnc)
b := td.GetField(i, tup)
if b != nil {
v = readCell(b)
ok = true
}
return
}
// Format prints a Tuple as a string.
func (td TupleDesc) Format(tup Tuple) string {
if tup == nil || tup.Count() == 0 {
@@ -562,6 +572,8 @@ func formatValue(enc Encoding, value []byte) string {
return hex.EncodeToString(value)
case CommitAddrEnc:
return hex.EncodeToString(value)
case CellEnc:
return hex.EncodeToString(value)
default:
return string(value)
}