mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-31 03:18:43 -06:00
Merge pull request #5206 from dolthub/james/spatial-enc
adding new codec for spatial indexes
This commit is contained in:
@@ -45,8 +45,9 @@ const (
|
||||
EncodingBytesAddr Encoding = 21
|
||||
EncodingCommitAddr Encoding = 22
|
||||
EncodingStringAddr Encoding = 23
|
||||
EncodingJSONAddr Encoding = 24
|
||||
EncodingString Encoding = 128
|
||||
EncodingJSONAddr Encoding = 24
|
||||
EncodingCell Encoding = 25
|
||||
EncodingString Encoding = 128
|
||||
EncodingBytes Encoding = 129
|
||||
EncodingDecimal Encoding = 130
|
||||
EncodingJSON Encoding = 131
|
||||
@@ -77,6 +78,7 @@ var EnumNamesEncoding = map[Encoding]string{
|
||||
EncodingCommitAddr: "CommitAddr",
|
||||
EncodingStringAddr: "StringAddr",
|
||||
EncodingJSONAddr: "JSONAddr",
|
||||
EncodingCell: "Cell",
|
||||
EncodingString: "String",
|
||||
EncodingBytes: "Bytes",
|
||||
EncodingDecimal: "Decimal",
|
||||
@@ -108,6 +110,7 @@ var EnumValuesEncoding = map[string]Encoding{
|
||||
"CommitAddr": EncodingCommitAddr,
|
||||
"StringAddr": EncodingStringAddr,
|
||||
"JSONAddr": EncodingJSONAddr,
|
||||
"Cell": EncodingCell,
|
||||
"String": EncodingString,
|
||||
"Bytes": EncodingBytes,
|
||||
"Decimal": EncodingDecimal,
|
||||
|
||||
@@ -214,6 +214,8 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
|
||||
tb.PutStringAddr(i, h)
|
||||
case val.CommitAddrEnc:
|
||||
tb.PutCommitAddr(i, v.(hash.Hash))
|
||||
case val.CellEnc:
|
||||
tb.PutCell(i, ZCell(v.(types.GeometryValue)))
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown encoding %v %v", enc, v))
|
||||
}
|
||||
|
||||
@@ -23,6 +23,8 @@ import (
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql/expression/function/spatial"
|
||||
"github.com/dolthub/go-mysql-server/sql/types"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
// LexFloat maps the float64 into an uint64 representation in lexicographical order
|
||||
@@ -150,20 +152,36 @@ func ZSort(points []types.Point) []types.Point {
|
||||
return points
|
||||
}
|
||||
|
||||
// ZAddr converts the GeometryValue into a key: (min_z_val, level)
|
||||
// ZCell converts the GeometryValue into a Cell
|
||||
// Note: there is an inefficiency here where small polygons may be placed into a level that's significantly larger
|
||||
func ZAddr(v types.GeometryValue) [17]byte {
|
||||
func ZCell(v types.GeometryValue) val.Cell {
|
||||
bbox := spatial.FindBBox(v)
|
||||
zMin := ZValue(types.Point{X: bbox[0], Y: bbox[1]})
|
||||
zMax := ZValue(types.Point{X: bbox[2], Y: bbox[3]})
|
||||
|
||||
cell := val.Cell{}
|
||||
binary.BigEndian.PutUint64(cell.ZValue[:], zMin[0])
|
||||
binary.BigEndian.PutUint64(cell.ZValue[8:], zMin[1])
|
||||
if res := zMin[0] ^ zMax[0]; res != 0 {
|
||||
cell.Level = byte(64 - bits.LeadingZeros64(res)/2)
|
||||
} else {
|
||||
cell.Level = byte(32 - bits.LeadingZeros64(zMin[1]^zMax[1])/2)
|
||||
}
|
||||
return cell
|
||||
}
|
||||
|
||||
// ZAddr converts the GeometryValue into a key: (level, min_z_val)
|
||||
func ZAddr(v types.GeometryValue) [17]byte {
|
||||
bbox := spatial.FindBBox(v)
|
||||
zMin := ZValue(types.Point{X: bbox[0], Y: bbox[1]})
|
||||
zMax := ZValue(types.Point{X: bbox[2], Y: bbox[3]})
|
||||
addr := [17]byte{}
|
||||
binary.BigEndian.PutUint64(addr[1:], zMin[0])
|
||||
binary.BigEndian.PutUint64(addr[9:], zMin[1])
|
||||
if res := zMin[0] ^ zMax[0]; res != 0 {
|
||||
addr[0] = byte(64 - bits.LeadingZeros64(res)/2)
|
||||
} else {
|
||||
addr[0] = byte(32 + bits.LeadingZeros64(zMin[1]^zMax[1])/2)
|
||||
addr[0] = byte(32 - bits.LeadingZeros64(zMin[1]^zMax[1])/2)
|
||||
}
|
||||
return addr
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ func TestZAddr(t *testing.T) {
|
||||
t.Run("test points z-addrs", func(t *testing.T) {
|
||||
p := types.Point{X: 1, Y: 2}
|
||||
res := ZAddr(p)
|
||||
assert.Equal(t, "40e5555500000000000000000000000000", hex.EncodeToString(res[:]))
|
||||
assert.Equal(t, "00e5555500000000000000000000000000", hex.EncodeToString(res[:]))
|
||||
})
|
||||
|
||||
t.Run("test linestring z-addrs", func(t *testing.T) {
|
||||
@@ -222,35 +222,35 @@ func TestZAddr(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestZSort(t *testing.T) {
|
||||
p1 := types.LineString{Points: []types.Point{ps[16], ps[19]}}
|
||||
p2 := types.LineString{Points: []types.Point{ps[0], ps[3]}}
|
||||
p3 := types.LineString{Points: []types.Point{ps[19], ps[24]}}
|
||||
p4 := types.LineString{Points: []types.Point{ps[3], ps[19]}}
|
||||
p5 := types.LineString{Points: []types.Point{ps[24], ps[24]}}
|
||||
p1 := types.LineString{Points: []types.Point{ps[24], ps[24]}}
|
||||
p2 := types.LineString{Points: []types.Point{ps[16], ps[19]}}
|
||||
p3 := types.LineString{Points: []types.Point{ps[0], ps[3]}}
|
||||
p4 := types.LineString{Points: []types.Point{ps[19], ps[24]}}
|
||||
p5 := types.LineString{Points: []types.Point{ps[3], ps[19]}}
|
||||
|
||||
t.Run("test z-addr p1", func(t *testing.T) {
|
||||
z := ZAddr(p1) // bbox: (0, 0), (1, 1)
|
||||
assert.Equal(t, "3ec0000000000000000000000000000000", hex.EncodeToString(z[:]))
|
||||
z := ZAddr(p1) // bbox: (2, 2), (2, 2)
|
||||
assert.Equal(t, "00f0000000000000000000000000000000", hex.EncodeToString(z[:]))
|
||||
})
|
||||
|
||||
t.Run("test z-addr p2", func(t *testing.T) {
|
||||
z := ZAddr(p2) // bbox: (-2, -2), (-1, -1)
|
||||
assert.Equal(t, "3f0fffffffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
|
||||
z := ZAddr(p2) // bbox: (0, 0), (1, 1)
|
||||
assert.Equal(t, "3ec0000000000000000000000000000000", hex.EncodeToString(z[:]))
|
||||
})
|
||||
|
||||
t.Run("test z-addr p3", func(t *testing.T) {
|
||||
z := ZAddr(p3) // bbox: (1, 1), (2, 2)
|
||||
assert.Equal(t, "3fcfffff00000000000000000000000000", hex.EncodeToString(z[:]))
|
||||
z := ZAddr(p3) // bbox: (-2, -2), (-1, -1)
|
||||
assert.Equal(t, "3f0fffffffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
|
||||
})
|
||||
|
||||
t.Run("test z-addr p4", func(t *testing.T) {
|
||||
z := ZAddr(p4) // bbox: (-1, -1), (1, 1)
|
||||
assert.Equal(t, "40300000ffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
|
||||
z := ZAddr(p4) // bbox: (1, 1), (2, 2)
|
||||
assert.Equal(t, "3fcfffff00000000000000000000000000", hex.EncodeToString(z[:]))
|
||||
})
|
||||
|
||||
t.Run("test z-addr p6", func(t *testing.T) {
|
||||
z := ZAddr(p5) // bbox: (2, 2), (2, 2)
|
||||
assert.Equal(t, "40f0000000000000000000000000000000", hex.EncodeToString(z[:]))
|
||||
t.Run("test z-addr p5", func(t *testing.T) {
|
||||
z := ZAddr(p5) // bbox: (-1, -1), (1, 1)
|
||||
assert.Equal(t, "40300000ffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
|
||||
})
|
||||
|
||||
t.Run("test z-addr sorting", func(t *testing.T) {
|
||||
|
||||
@@ -39,6 +39,7 @@ enum Encoding : uint8 {
|
||||
CommitAddr = 22,
|
||||
StringAddr = 23,
|
||||
JSONAddr = 24,
|
||||
Cell = 25,
|
||||
|
||||
// variable width
|
||||
String = 128,
|
||||
|
||||
@@ -63,6 +63,7 @@ const (
|
||||
commitAddrEnc ByteSize = hash.ByteLen
|
||||
stringAddrEnc ByteSize = hash.ByteLen
|
||||
jsonAddrEnc ByteSize = hash.ByteLen
|
||||
cellSize ByteSize = 17
|
||||
)
|
||||
|
||||
type Encoding byte
|
||||
@@ -92,6 +93,7 @@ const (
|
||||
CommitAddrEnc = Encoding(serial.EncodingCommitAddr)
|
||||
StringAddrEnc = Encoding(serial.EncodingStringAddr)
|
||||
JSONAddrEnc = Encoding(serial.EncodingJSONAddr)
|
||||
CellEnc = Encoding(serial.EncodingCell)
|
||||
|
||||
sentinel Encoding = 127
|
||||
)
|
||||
@@ -622,3 +624,31 @@ func expectSize(buf []byte, sz ByteSize) {
|
||||
func stringFromBytes(b []byte) string {
|
||||
return *(*string)(unsafe.Pointer(&b))
|
||||
}
|
||||
|
||||
// Cell is a representation of a subregion for Spatial Indexes
|
||||
// Level encodes the size of the region
|
||||
// ZValue is the z-value encoding of the minimum point of the bbox of a geometry
|
||||
type Cell struct {
|
||||
Level byte
|
||||
ZValue [16]byte
|
||||
}
|
||||
|
||||
func compareCell(l, r Cell) int {
|
||||
if l.Level != r.Level {
|
||||
return int(l.Level - r.Level)
|
||||
}
|
||||
return bytes.Compare(l.ZValue[:], r.ZValue[:])
|
||||
}
|
||||
|
||||
func readCell(val []byte) (res Cell) {
|
||||
expectSize(val, cellSize)
|
||||
res.Level = val[0]
|
||||
copy(res.ZValue[:], val[1:])
|
||||
return
|
||||
}
|
||||
|
||||
func writeCell(buf []byte, v Cell) {
|
||||
expectSize(buf, cellSize)
|
||||
buf[0] = v.Level
|
||||
copy(buf[1:], v.ZValue[:])
|
||||
}
|
||||
|
||||
@@ -240,6 +240,13 @@ func TestCompare(t *testing.T) {
|
||||
l: encStr("b"), r: encStr("a"),
|
||||
cmp: 1,
|
||||
},
|
||||
// z-address
|
||||
{
|
||||
typ: Type{Enc: StringEnc},
|
||||
l: encCell(Cell{}),
|
||||
r: encCell(Cell{}),
|
||||
cmp: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
@@ -297,6 +304,12 @@ func encStr(s string) []byte {
|
||||
return buf
|
||||
}
|
||||
|
||||
func encCell(c Cell) []byte {
|
||||
buf := make([]byte, cellSize)
|
||||
writeCell(buf, c)
|
||||
return buf
|
||||
}
|
||||
|
||||
func encYear(y int16) []byte {
|
||||
buf := make([]byte, yearSize)
|
||||
writeYear(buf, y)
|
||||
|
||||
@@ -366,3 +366,12 @@ func (tb *TupleBuilder) ensureCapacity(sz ByteSize) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PutCell writes a Cell to the ith field of the Tuple being built.
|
||||
func (tb *TupleBuilder) PutCell(i int, v Cell) {
|
||||
tb.Desc.expectEncoding(i, CellEnc)
|
||||
tb.ensureCapacity(cellSize)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+cellSize]
|
||||
writeCell(tb.fields[i], v)
|
||||
tb.pos += cellSize
|
||||
}
|
||||
|
||||
@@ -143,6 +143,8 @@ func compare(typ Type, left, right []byte) int {
|
||||
return compareAddr(readAddr(left), readAddr(right))
|
||||
case StringAddrEnc:
|
||||
return compareAddr(readAddr(left), readAddr(right))
|
||||
case CellEnc:
|
||||
return compareCell(readCell(left), readCell(right))
|
||||
default:
|
||||
panic("unknown encoding")
|
||||
}
|
||||
|
||||
@@ -468,6 +468,16 @@ func (td TupleDesc) expectEncoding(i int, encodings ...Encoding) {
|
||||
panic("incorrect value encoding")
|
||||
}
|
||||
|
||||
func (td TupleDesc) GetCell(i int, tup Tuple) (v Cell, ok bool) {
|
||||
td.expectEncoding(i, CellEnc)
|
||||
b := td.GetField(i, tup)
|
||||
if b != nil {
|
||||
v = readCell(b)
|
||||
ok = true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Format prints a Tuple as a string.
|
||||
func (td TupleDesc) Format(tup Tuple) string {
|
||||
if tup == nil || tup.Count() == 0 {
|
||||
@@ -562,6 +572,8 @@ func formatValue(enc Encoding, value []byte) string {
|
||||
return hex.EncodeToString(value)
|
||||
case CommitAddrEnc:
|
||||
return hex.EncodeToString(value)
|
||||
case CellEnc:
|
||||
return hex.EncodeToString(value)
|
||||
default:
|
||||
return string(value)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user