Merge branch 'james/zaddr' into james/spatial-enc

This commit is contained in:
James Cor
2023-01-24 15:29:54 -08:00
2 changed files with 199 additions and 76 deletions

View File

@@ -17,6 +17,7 @@ package index
import (
"bytes"
"math"
"math/bits"
"sort"
"github.com/dolthub/go-mysql-server/sql/expression/function/spatial"
@@ -46,45 +47,85 @@ func UnLexFloat(b uint64) float64 {
return math.Float64frombits(b)
}
// ZValue takes a Point and interleaves the bits into a [16]byte
// It will put the bits in this order: x_0, y_0, x_1, y_1 ... x_63, Y_63
func ZValue(p types.Point) [16]byte {
xLex := LexFloat(p.X)
yLex := LexFloat(p.Y)
var masks = []uint64{
0x0000FFFF0000FFFF,
0x00FF00FF00FF00FF,
0x0F0F0F0F0F0F0F0F,
0x3333333333333333,
0x5555555555555555}
res := [16]byte{}
for i := 0; i < 16; i++ {
for j := 0; j < 4; j++ {
x, y := byte((xLex&1)<<1), byte(yLex&1)
res[15-i] |= (x | y) << (2 * j)
xLex, yLex = xLex>>1, yLex>>1
}
var shifts = []uint64{16, 8, 4, 2, 1}
// InterleaveUInt64 interleaves the bits of the uint64s x and y.
// The first 32 bits of x and y must be 0.
// Example:
// 0000 0000 0000 0000 0000 0000 0000 0000 abcd efgh ijkl mnop abcd efgh ijkl mnop
// 0000 0000 0000 0000 abcd efgh ijkl mnop 0000 0000 0000 0000 abcd efgh ijkl mnop
// 0000 0000 abcd efgh 0000 0000 ijkl mnop 0000 0000 abcd efgh 0000 0000 ijkl mnop
// 0000 abcd 0000 efgh 0000 ijkl 0000 mnop 0000 abcd 0000 efgh 0000 ijkl 0000 mnop
// 00ab 00cd 00ef 00gh 00ij 00kl 00mn 00op 00ab 00cd 00ef 00gh 00ij 00kl 00mn 00op
// 0a0b 0c0d 0e0f 0g0h 0i0j 0k0l 0m0n 0o0p 0a0b 0c0d 0e0f 0g0h 0i0j 0k0l 0m0n 0o0p
// Alternatively, just precompute all the results from 0 to 0x0000FFFFF
func InterleaveUInt64(x, y uint64) uint64 {
for i := 0; i < 5; i++ {
x = (x | (x << shifts[i])) & masks[i]
y = (y | (y << shifts[i])) & masks[i]
}
return res
return x | (y << 1)
}
// UnZValue takes a [16]byte Z-Value and converts it back to a sql.Point
func UnZValue(z [16]byte) types.Point {
var x, y uint64
for i := 15; i >= 0; i-- {
zv := uint64(z[i])
for j := 3; j >= 0; j-- {
y |= (zv & 1) << (63 - (4*i + j))
zv >>= 1
// ZValue takes a Point and interleaves the bits into a [2]uint64
// It will put the bits in this order: x_0, y_0, x_1, y_1 ... x_63, Y_63
func ZValue(p types.Point) (z [2]uint64) {
xLex, yLex := LexFloat(p.X), LexFloat(p.Y)
z[0], z[1] = InterleaveUInt64(xLex>>32, yLex>>32), InterleaveUInt64(xLex&0xFFFFFFFF, yLex&0xFFFFFFFF)
return
}
x |= (zv & 1) << (63 - (4*i + j))
zv >>= 1
}
// UnInterleaveUint64 splits up the bits of the uint64 z into two uint64s
// The first 32 bits of x and y must be 0.
// Example:
// abcd efgh ijkl mnop abcd efgh ijkl mnop abcd efgh ijkl mnop abcd efgh ijkl mnop 0x5555555555555555
// 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p 0b0d 0f0h 0j0l 0n0p x | x >> 1
// 0bbd dffh hjjl lnnp pbbd dffh hjjl lnnp pbbd dffh hjjl lnnp pnbd dffh hjjl lnnp 0x3333333333333333
// 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np 00bd 00fh 00jl 00np x | x >> 2
// 0000 bdfh fhjl jlnp npbd bdfh fhjl jlnp npdb bdfh fhjl jlnp npdb bdfh fhjl jlnp 0x0F0F0F0F0F0F0F0F
// 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp 0000 bdfh 0000 jlnp x | x >> 4
// 0000 bdfh bdfh jlnp jlnp bdfh bdfh jlnp jlnp bdfh bdfh jlnp jlnp bdfh bdfh jlnp 0x00FF00FF00FF00FF
// 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp 0000 0000 bdfh jlnp x | x >> 8
// 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp 0x0000FFFF0000FFFF
// 0000 0000 0000 0000 bdfh jlnp bdfh jlnp 0000 0000 0000 0000 bdfh jlnp bdfh jlnp x | x >> 16
// 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp 0x00000000FFFFFFFF
// 0000 0000 0000 0000 0000 0000 0000 0000 bdfh jlnp bdfh jlnp bdfh jlnp bdfh jlnp
func UnInterleaveUint64(z uint64) (x, y uint64) {
x, y = z, z>>1
for i := 4; i >= 0; i-- {
x &= masks[i]
x |= x >> shifts[i]
y &= masks[i]
y |= y >> shifts[i]
}
xf := UnLexFloat(x)
yf := UnLexFloat(y)
x &= 0xFFFFFFFF
y &= 0xFFFFFFFF
return
}
// UnZValue takes a [2]uint64 Z-Value and converts it back to a sql.Point
func UnZValue(z [2]uint64) types.Point {
xl, yl := UnInterleaveUint64(z[0])
xr, yr := UnInterleaveUint64(z[1])
xf := UnLexFloat((xl << 32) | xr)
yf := UnLexFloat((yl << 32) | yr)
return types.Point{X: xf, Y: yf}
}
func ZSort(points []types.Point) []types.Point {
sort.Slice(points, func(i, j int) bool {
zi, zj := ZValue(points[i]), ZValue(points[j])
return bytes.Compare(zi[:], zj[:]) < 0
if zi[0] == zj[0] {
return zi[1] < zj[1]
}
return zi[0] < zj[0]
})
return points
}
@@ -97,26 +138,25 @@ func ZAddr(v types.GeometryValue) [17]byte {
zMax := ZValue(types.Point{X: bbox[2], Y: bbox[3]})
addr := [17]byte{}
for i := 0; i < 16; i++ {
addr[i] = zMin[i]
}
// TODO: 64 levels sufficient?
var level uint8
for i := uint8(0); i < 16; i++ {
match := zMin[i] ^ zMax[i]
if match == 0 {
continue
for i := 0; i < 2; i++ {
for j := 0; j < 8; j++ {
addr[8*i+j+1] = byte((zMin[i] >> (8 * (7 - j))) & 0xFF)
}
var mask uint8 = 0x80
for j := uint8(0); j < 8; j++ {
if mask&match == 1 {
level = 8*i + j
}
mask = mask >> 1
}
break
}
addr[16] = level
if res := zMin[0] ^ zMax[0]; res != 0 {
addr[0] = byte(64 - bits.LeadingZeros64(res) / 2)
} else {
addr[0] = byte(32 + bits.LeadingZeros64(zMin[1]^zMax[1]))
}
return addr
}
// ZAddrSort converts the GeometryValue into a key: (min_z_val, level)
// Note: there is an inefficiency here where small polygons may be placed into a level that's significantly larger
func ZAddrSort(geoms []types.GeometryValue) []types.GeometryValue {
sort.Slice(geoms, func(i, j int) bool {
zi, zj := ZAddr(geoms[i]), ZAddr(geoms[j])
return bytes.Compare(zi[:], zj[:]) < 0
})
return geoms
}

View File

@@ -25,6 +25,35 @@ import (
assert "github.com/stretchr/testify/require"
)
// these are sorted
var ps = []types.Point{
{X: -2, Y: -2}, // 0
{X: -1, Y: -2},
{X: -2, Y: -1},
{X: -1, Y: -1},
{X: 0, Y: -2}, // 4
{X: 1, Y: -2},
{X: 2, Y: -2},
{X: 0, Y: -1},
{X: 1, Y: -1}, // 8
{X: 2, Y: -1},
{X: -2, Y: 0},
{X: -2, Y: 1},
{X: -1, Y: 0}, // 12
{X: -1, Y: 1},
{X: -2, Y: 2},
{X: -1, Y: 2},
{X: 0, Y: 0}, // 16
{X: 1, Y: 0},
{X: 0, Y: 1},
{X: 1, Y: 1},
{X: 2, Y: 0}, // 20
{X: 2, Y: 1},
{X: 0, Y: 2},
{X: 1, Y: 2},
{X: 2, Y: 2}, // 24
}
func TestLexFloat(t *testing.T) {
t.Run("test edge case lex float values", func(t *testing.T) {
assert.Equal(t, uint64(0x0010000000000000), LexFloat(-math.MaxFloat64))
@@ -85,70 +114,76 @@ func TestLexFloat(t *testing.T) {
func TestZValue(t *testing.T) {
t.Run("test z-values", func(t *testing.T) {
z := ZValue(types.Point{X: -5000, Y: -5000})
assert.Equal(t, "0fff30f03f3fffffffffffffffffffff", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0x0fff30f03f3fffff, 0xffffffffffffffff}, z)
z = ZValue(types.Point{X: -1, Y: -1})
assert.Equal(t, "300000ffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0x300000ffffffffff, 0xffffffffffffffff}, z)
z = ZValue(types.Point{X: -1, Y: 0})
assert.Equal(t, "600000aaaaaaaaaaaaaaaaaaaaaaaaaa", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0x9000005555555555, 0x5555555555555555}, z)
z = ZValue(types.Point{X: -1, Y: 1})
assert.Equal(t, "655555aaaaaaaaaaaaaaaaaaaaaaaaaa", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0x9aaaaa5555555555, 0x5555555555555555}, z)
z = ZValue(types.Point{X: 0, Y: -1})
assert.Equal(t, "90000055555555555555555555555555", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0x600000aaaaaaaaaa, 0xaaaaaaaaaaaaaaaa}, z)
z = ZValue(types.Point{X: 1, Y: -1})
assert.Equal(t, "9aaaaa55555555555555555555555555", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0x655555aaaaaaaaaa, 0xaaaaaaaaaaaaaaaa}, z)
z = ZValue(types.Point{X: 0, Y: 0})
assert.Equal(t, "c0000000000000000000000000000000", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0xc000000000000000, 0x000000000000000}, z)
z = ZValue(types.Point{X: 1, Y: 0})
assert.Equal(t, "caaaaa00000000000000000000000000", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0xc555550000000000, 0x000000000000000}, z)
z = ZValue(types.Point{X: 0, Y: 1})
assert.Equal(t, "c5555500000000000000000000000000", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0xcaaaaa0000000000, 0x000000000000000}, z)
z = ZValue(types.Point{X: 1, Y: 1})
assert.Equal(t, "cfffff00000000000000000000000000", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0xcfffff0000000000, 0x000000000000000}, z)
z = ZValue(types.Point{X: 2, Y: 2})
assert.Equal(t, "f0000000000000000000000000000000", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0xf000000000000000, 0x000000000000000}, z)
z = ZValue(types.Point{X: 50000, Y: 50000})
assert.Equal(t, "f000fcc03ccc00000000000000000000", hex.EncodeToString(z[:]))
assert.Equal(t, [2]uint64{0xf000fcc03ccc0000, 0x000000000000000}, z)
})
t.Run("test un-z-values", func(t *testing.T) {
v, _ := hex.DecodeString("c0000000000000000000000000000000")
z := [16]byte{}
for i, v := range v {
z[i] = v
}
z := [2]uint64{0xc000000000000000, 0x000000000000000}
assert.Equal(t, types.Point{X: 0, Y: 0}, UnZValue(z))
v, _ = hex.DecodeString("daaaaa00000000000000000000000000")
z = [16]byte{}
for i, v := range v {
z[i] = v
}
assert.Equal(t, types.Point{X: 1, Y: 2}, UnZValue(z))
z = [2]uint64{0xdaaaaa0000000000, 0x000000000000000}
assert.Equal(t, types.Point{X: 2, Y: 1}, UnZValue(z))
})
t.Run("test sorting points by z-value", func(t *testing.T) {
sortedPoints := []types.Point{
{X: -5000, Y: -5000},
{X: -2, Y: -2},
{X: -1, Y: -2},
{X: -2, Y: -1},
{X: -1, Y: -1},
{X: 0, Y: -2},
{X: 1, Y: -2},
{X: 2, Y: -2},
{X: 0, Y: -1},
{X: 1, Y: -1},
{X: 2, Y: -1},
{X: -2, Y: 0},
{X: -2, Y: 1},
{X: -1, Y: 0},
{X: -1, Y: 1},
{X: 1, Y: -1},
{X: -2, Y: 2},
{X: -1, Y: 2},
{X: 0, Y: 0},
{X: 1, Y: 0},
{X: 0, Y: 1},
{X: 1, Y: 1},
{X: 2, Y: 0},
{X: 2, Y: 1},
{X: 0, Y: 2},
{X: 1, Y: 2},
{X: 2, Y: 2},
{X: 100, Y: 100},
}
randPoints := append([]types.Point{}, sortedPoints...)
rand.Shuffle(len(randPoints), func(i, j int) {
@@ -162,7 +197,7 @@ func TestZAddr(t *testing.T) {
t.Run("test points z-addrs", func(t *testing.T) {
p := types.Point{X: 1, Y: 2}
res := ZAddr(p)
assert.Equal(t, "daaaaa0000000000000000000000000000", hex.EncodeToString(res[:]))
assert.Equal(t, "60e5555500000000000000000000000000", hex.EncodeToString(res[:]))
})
t.Run("test linestring z-addrs", func(t *testing.T) {
@@ -171,7 +206,7 @@ func TestZAddr(t *testing.T) {
c := types.Point{X: 3, Y: 3}
l := types.LineString{Points: []types.Point{a, b, c}}
res := ZAddr(l)
assert.Equal(t, "cfffff0000000000000000000000000007", hex.EncodeToString(res[:]))
assert.Equal(t, "3fcfffff00000000000000000000000000", hex.EncodeToString(res[:]))
})
t.Run("test polygon z-addrs", func(t *testing.T) {
@@ -182,6 +217,54 @@ func TestZAddr(t *testing.T) {
l := types.LineString{Points: []types.Point{a, b, c, d, a}}
p := types.Polygon{Lines: []types.LineString{l}}
res := ZAddr(p)
assert.Equal(t, "300000ffffffffffffffffffffffffff07", hex.EncodeToString(res[:]))
assert.Equal(t, "40300000ffffffffffffffffffffffffff", hex.EncodeToString(res[:]))
})
}
func TestZSort(t *testing.T) {
p1 := types.LineString{Points: []types.Point{ps[16], ps[19]}}
p2 := types.LineString{Points: []types.Point{ps[0], ps[3]}}
p3 := types.LineString{Points: []types.Point{ps[19], ps[24]}}
p4 := types.LineString{Points: []types.Point{ps[3], ps[16]}}
p5 := types.LineString{Points: []types.Point{ps[3], ps[19]}}
p6 := types.LineString{Points: []types.Point{ps[24], ps[24]}}
t.Run("test z-addr p1", func(t *testing.T) {
z := ZAddr(p1) // bbox: (0, 0), (1, 1)
assert.Equal(t, "3ec0000000000000000000000000000000", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p2", func(t *testing.T) {
z := ZAddr(p2) // bbox: (-2, -2), (-1, -1)
assert.Equal(t, "3f0fffffffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p3", func(t *testing.T) {
z := ZAddr(p3) // bbox: (1, 1), (2, 2)
assert.Equal(t, "3fcfffff00000000000000000000000000", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p4", func(t *testing.T) {
z := ZAddr(p4) // bbox: (-1, -1), (1, 1)
assert.Equal(t, "40300000ffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p5", func(t *testing.T) {
z := ZAddr(p5) // bbox: (-1, -1), (0, 0)
assert.Equal(t, "40300000ffffffffffffffffffffffffff", hex.EncodeToString(z[:]))
})
t.Run("test z-addr p6", func(t *testing.T) {
z := ZAddr(p6) // bbox: (2, 2), (2, 2)
assert.Equal(t, "60f0000000000000000000000000000000", hex.EncodeToString(z[:]))
})
t.Run("test z-addr sorting", func(t *testing.T) {
sortedGeoms := []types.GeometryValue{p1, p2, p3, p4, p5}
randomGeoms := append([]types.GeometryValue{}, sortedGeoms...)
rand.Shuffle(len(randomGeoms), func(i, j int) {
randomGeoms[i], randomGeoms[j] = randomGeoms[j], randomGeoms[i]
})
assert.Equal(t, sortedGeoms, ZAddrSort(randomGeoms))
})
}