mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-25 18:49:36 -06:00
increase max skip.List height from 5 to 10, improve height promotion probabilities, cleanup comparator callback types
This commit is contained in:
@@ -133,11 +133,11 @@ func memIterFromRange(list *skip.List, rng Range) *memRangeIter {
|
||||
}
|
||||
}
|
||||
|
||||
// skipSearchFromRange is a skip.SearchFn used to initialize
|
||||
// a skip.List iterator for a given Range. The skip.SearchFn
|
||||
// skipSearchFromRange is a skip.SeekFn used to initialize
|
||||
// a skip.List iterator for a given Range. The skip.SeekFn
|
||||
// returns true if the iter being initialized is not yet
|
||||
// within the bounds of Range |rng|.
|
||||
func skipSearchFromRange(rng Range) skip.SearchFn {
|
||||
func skipSearchFromRange(rng Range) skip.SeekFn {
|
||||
return func(nodeKey []byte) bool {
|
||||
if nodeKey == nil {
|
||||
return false
|
||||
|
||||
@@ -17,35 +17,28 @@ package skip
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
|
||||
"github.com/zeebo/xxh3"
|
||||
)
|
||||
|
||||
const (
|
||||
maxCount = math.MaxUint32 - 1
|
||||
|
||||
maxHeight = uint8(5)
|
||||
highest = maxHeight - 1
|
||||
|
||||
maxHeight = 9
|
||||
maxCount = math.MaxUint32 - 1
|
||||
sentinelId = nodeId(0)
|
||||
)
|
||||
|
||||
type KeyOrder func(l, r []byte) (cmp int)
|
||||
|
||||
type SeekFn func(key []byte) (advance bool)
|
||||
|
||||
type List struct {
|
||||
nodes []skipNode
|
||||
count uint32
|
||||
|
||||
nodes []skipNode
|
||||
count uint32
|
||||
checkpoint nodeId
|
||||
cmp ValueCmp
|
||||
salt uint64
|
||||
keyOrder KeyOrder
|
||||
}
|
||||
|
||||
type ValueCmp func(left, right []byte) int
|
||||
|
||||
type SearchFn func(nodeKey []byte) bool
|
||||
|
||||
type nodeId uint32
|
||||
|
||||
type skipPointer [maxHeight]nodeId
|
||||
type skipPointer [maxHeight + 1]nodeId
|
||||
|
||||
type skipNode struct {
|
||||
key, val []byte
|
||||
@@ -56,7 +49,7 @@ type skipNode struct {
|
||||
height uint8
|
||||
}
|
||||
|
||||
func NewSkipList(cmp ValueCmp) *List {
|
||||
func NewSkipList(order KeyOrder) *List {
|
||||
nodes := make([]skipNode, 0, 8)
|
||||
|
||||
// initialize sentinel node
|
||||
@@ -71,8 +64,7 @@ func NewSkipList(cmp ValueCmp) *List {
|
||||
return &List{
|
||||
nodes: nodes,
|
||||
checkpoint: nodeId(1),
|
||||
cmp: cmp,
|
||||
salt: rand.Uint64(),
|
||||
keyOrder: order,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -123,7 +115,7 @@ func (l *List) Put(key, val []byte) {
|
||||
if key == nil {
|
||||
panic("key must be non-nil")
|
||||
}
|
||||
if l.Count() >= maxCount {
|
||||
if len(l.nodes) >= maxCount {
|
||||
panic("list has no capacity")
|
||||
}
|
||||
|
||||
@@ -147,7 +139,7 @@ func (l *List) pathToKey(key []byte) (path skipPointer) {
|
||||
next := l.headPointer()
|
||||
prev := sentinelId
|
||||
|
||||
for lvl := int(highest); lvl >= 0; {
|
||||
for lvl := int(maxHeight); lvl >= 0; {
|
||||
curr := l.getNode(next[lvl])
|
||||
|
||||
// descend if we can't advance at |lvl|
|
||||
@@ -168,7 +160,7 @@ func (l *List) pathBeforeKey(key []byte) (path skipPointer) {
|
||||
next := l.headPointer()
|
||||
prev := sentinelId
|
||||
|
||||
for lvl := int(highest); lvl >= 0; {
|
||||
for lvl := int(maxHeight); lvl >= 0; {
|
||||
curr := l.getNode(next[lvl])
|
||||
|
||||
// descend if we can't advance at |lvl|
|
||||
@@ -190,7 +182,7 @@ func (l *List) insert(key, value []byte, path skipPointer) {
|
||||
key: key,
|
||||
val: value,
|
||||
id: l.nextNodeId(),
|
||||
height: rollHeight(key, l.salt),
|
||||
height: rollHeight(),
|
||||
}
|
||||
l.nodes = append(l.nodes, novel)
|
||||
|
||||
@@ -255,22 +247,20 @@ func (it *ListIter) Retreat() {
|
||||
|
||||
func (l *List) GetIterAt(key []byte) (it *ListIter) {
|
||||
return l.GetIterFromSearchFn(func(nodeKey []byte) bool {
|
||||
return l.compareKeysWithFn(key, nodeKey, l.cmp) > 0
|
||||
return l.compareKeys(key, nodeKey) > 0
|
||||
})
|
||||
}
|
||||
|
||||
func (l *List) GetIterFromSearchFn(kontinue SearchFn) (it *ListIter) {
|
||||
func (l *List) GetIterFromSearchFn(fn SeekFn) (it *ListIter) {
|
||||
it = &ListIter{
|
||||
curr: l.seekWithSearchFn(kontinue),
|
||||
curr: l.seekWithFn(fn),
|
||||
list: l,
|
||||
}
|
||||
|
||||
if it.curr.id == sentinelId {
|
||||
// try to keep |it| in bounds if |key| is
|
||||
// greater than the largest key in |l|
|
||||
it.Retreat()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -290,20 +280,16 @@ func (l *List) IterAtEnd() *ListIter {
|
||||
|
||||
// seek returns the skipNode with the smallest key >= |key|.
|
||||
func (l *List) seek(key []byte) skipNode {
|
||||
return l.seekWithCompare(key, l.cmp)
|
||||
}
|
||||
|
||||
func (l *List) seekWithCompare(key []byte, cmp ValueCmp) (node skipNode) {
|
||||
return l.seekWithSearchFn(func(nodeKey []byte) bool {
|
||||
return l.compareKeysWithFn(key, nodeKey, cmp) > 0
|
||||
return l.seekWithFn(func(curr []byte) (advance bool) {
|
||||
return l.compareKeys(key, curr) > 0
|
||||
})
|
||||
}
|
||||
|
||||
func (l *List) seekWithSearchFn(kontinue SearchFn) (node skipNode) {
|
||||
func (l *List) seekWithFn(cb SeekFn) (node skipNode) {
|
||||
ptr := l.headPointer()
|
||||
for h := int64(highest); h >= 0; h-- {
|
||||
for h := int64(maxHeight); h >= 0; h-- {
|
||||
node = l.getNode(ptr[h])
|
||||
for kontinue(node.key) {
|
||||
for cb(node.key) {
|
||||
ptr = node.next
|
||||
node = l.getNode(ptr[h])
|
||||
}
|
||||
@@ -336,43 +322,35 @@ func (l *List) nextNodeId() nodeId {
|
||||
return nodeId(len(l.nodes))
|
||||
}
|
||||
|
||||
func (l *List) compare(left, right skipNode) int {
|
||||
return l.compareKeys(left.key, right.key)
|
||||
}
|
||||
|
||||
func (l *List) compareKeys(left, right []byte) int {
|
||||
return l.compareKeysWithFn(left, right, l.cmp)
|
||||
}
|
||||
|
||||
func (l *List) compareKeysWithFn(left, right []byte, cmp ValueCmp) int {
|
||||
if right == nil {
|
||||
return -1 // |right| is sentinel key
|
||||
}
|
||||
return cmp(left, right)
|
||||
return l.keyOrder(left, right)
|
||||
}
|
||||
|
||||
const (
|
||||
pattern0 = uint64(1<<3 - 1)
|
||||
pattern1 = uint64(1<<6 - 1)
|
||||
pattern2 = uint64(1<<9 - 1)
|
||||
pattern3 = uint64(1<<12 - 1)
|
||||
var (
|
||||
// Precompute the skiplist probabilities so that the optimal
|
||||
// p-value can be used (inverse of Euler's number).
|
||||
//
|
||||
// https://github.com/andy-kimball/arenaskl/blob/master/skl.go
|
||||
probabilities = [maxHeight]uint32{}
|
||||
randSrc = rand.New(rand.NewSource(rand.Int63()))
|
||||
)
|
||||
|
||||
func rollHeight(key []byte, salt uint64) (h uint8) {
|
||||
roll := xxh3.HashSeed(key, salt)
|
||||
patterns := []uint64{
|
||||
pattern0,
|
||||
pattern1,
|
||||
pattern2,
|
||||
pattern3,
|
||||
func init() {
|
||||
p := float64(1.0)
|
||||
for i := uint8(0); i < maxHeight; i++ {
|
||||
p /= math.E
|
||||
probabilities[i] = uint32(float64(math.MaxUint32) * p)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pat := range patterns {
|
||||
if uint64(roll)&pat != pat {
|
||||
break
|
||||
}
|
||||
func rollHeight() (h uint8) {
|
||||
rnd := randSrc.Uint32()
|
||||
h = 0
|
||||
for h < maxHeight && rnd <= probabilities[h] {
|
||||
h++
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -25,10 +25,10 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// var src = rand.New(rand.NewSource(time.Now().Unix()))
|
||||
var src = rand.New(rand.NewSource(0))
|
||||
|
||||
func TestSkipList(t *testing.T) {
|
||||
// set constant seed to improve debugging
|
||||
randSrc = rand.New(rand.NewSource(0))
|
||||
|
||||
t.Run("test skip list", func(t *testing.T) {
|
||||
vals := [][]byte{
|
||||
b("a"), b("b"), b("c"), b("d"), b("e"),
|
||||
@@ -39,7 +39,7 @@ func TestSkipList(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("test skip list of random bytes", func(t *testing.T) {
|
||||
vals := randomVals((src.Int63() % 10_000) + 100)
|
||||
vals := randomVals((randSrc.Int63() % 10_000) + 100)
|
||||
testSkipList(t, bytes.Compare, vals...)
|
||||
})
|
||||
t.Run("test with custom compare function", func(t *testing.T) {
|
||||
@@ -48,7 +48,7 @@ func TestSkipList(t *testing.T) {
|
||||
r := int64(binary.LittleEndian.Uint64(right))
|
||||
return int(l - r)
|
||||
}
|
||||
vals := randomInts((src.Int63() % 10_000) + 100)
|
||||
vals := randomInts((randSrc.Int63() % 10_000) + 100)
|
||||
testSkipList(t, compare, vals...)
|
||||
})
|
||||
}
|
||||
@@ -64,7 +64,7 @@ func TestSkipListCheckpoints(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("test skip list of random bytes", func(t *testing.T) {
|
||||
vals := randomVals((src.Int63() % 10_000) + 100)
|
||||
vals := randomVals((randSrc.Int63() % 10_000) + 100)
|
||||
testSkipListCheckpoints(t, bytes.Compare, vals...)
|
||||
})
|
||||
t.Run("test with custom compare function", func(t *testing.T) {
|
||||
@@ -73,7 +73,7 @@ func TestSkipListCheckpoints(t *testing.T) {
|
||||
r := int64(binary.LittleEndian.Uint64(right))
|
||||
return int(l - r)
|
||||
}
|
||||
vals := randomInts((src.Int63() % 10_000) + 100)
|
||||
vals := randomInts((randSrc.Int63() % 10_000) + 100)
|
||||
testSkipListCheckpoints(t, compare, vals...)
|
||||
})
|
||||
}
|
||||
@@ -81,9 +81,9 @@ func TestSkipListCheckpoints(t *testing.T) {
|
||||
func TestMemoryFootprint(t *testing.T) {
|
||||
var sz int
|
||||
sz = int(unsafe.Sizeof(skipNode{}))
|
||||
assert.Equal(t, 80, sz)
|
||||
assert.Equal(t, 104, sz)
|
||||
sz = int(unsafe.Sizeof(skipPointer{}))
|
||||
assert.Equal(t, 20, sz)
|
||||
assert.Equal(t, 40, sz)
|
||||
}
|
||||
|
||||
func BenchmarkList(b *testing.B) {
|
||||
@@ -205,8 +205,8 @@ func BenchmarkList(b *testing.B) {
|
||||
})
|
||||
}
|
||||
|
||||
func testSkipList(t *testing.T, compare ValueCmp, vals ...[]byte) {
|
||||
src.Shuffle(len(vals), func(i, j int) {
|
||||
func testSkipList(t *testing.T, compare KeyOrder, vals ...[]byte) {
|
||||
randSrc.Shuffle(len(vals), func(i, j int) {
|
||||
vals[i], vals[j] = vals[j], vals[i]
|
||||
})
|
||||
|
||||
@@ -244,8 +244,8 @@ func testSkipListPuts(t *testing.T, list *List, vals ...[]byte) {
|
||||
}
|
||||
|
||||
func testSkipListGets(t *testing.T, list *List, vals ...[]byte) {
|
||||
// get in different order
|
||||
src.Shuffle(len(vals), func(i, j int) {
|
||||
// get in different keyOrder
|
||||
randSrc.Shuffle(len(vals), func(i, j int) {
|
||||
vals[i], vals[j] = vals[j], vals[i]
|
||||
})
|
||||
|
||||
@@ -268,7 +268,7 @@ func testSkipListUpdates(t *testing.T, list *List, vals ...[]byte) {
|
||||
}
|
||||
assert.Equal(t, len(vals), list.Count())
|
||||
|
||||
src.Shuffle(len(vals), func(i, j int) {
|
||||
randSrc.Shuffle(len(vals), func(i, j int) {
|
||||
vals[i], vals[j] = vals[j], vals[i]
|
||||
})
|
||||
for _, exp := range vals {
|
||||
@@ -282,7 +282,7 @@ func testSkipListUpdates(t *testing.T, list *List, vals ...[]byte) {
|
||||
}
|
||||
|
||||
func testSkipListIterForward(t *testing.T, list *List, vals ...[]byte) {
|
||||
// put |vals| back in order
|
||||
// put |vals| back in keyOrder
|
||||
sort.Slice(vals, func(i, j int) bool {
|
||||
return list.compareKeys(vals[i], vals[j]) < 0
|
||||
})
|
||||
@@ -297,7 +297,7 @@ func testSkipListIterForward(t *testing.T, list *List, vals ...[]byte) {
|
||||
|
||||
// test iter at
|
||||
for k := 0; k < 10; k++ {
|
||||
idx = src.Int() % len(vals)
|
||||
idx = randSrc.Int() % len(vals)
|
||||
key := vals[idx]
|
||||
act := validateIterForwardFrom(t, list, key)
|
||||
exp := len(vals) - idx
|
||||
@@ -311,14 +311,14 @@ func testSkipListIterForward(t *testing.T, list *List, vals ...[]byte) {
|
||||
}
|
||||
|
||||
func testSkipListIterBackward(t *testing.T, list *List, vals ...[]byte) {
|
||||
// put |vals| back in order
|
||||
// put |vals| back in keyOrder
|
||||
sort.Slice(vals, func(i, j int) bool {
|
||||
return list.compareKeys(vals[i], vals[j]) < 0
|
||||
})
|
||||
|
||||
// test iter at
|
||||
for k := 0; k < 10; k++ {
|
||||
idx := src.Int() % len(vals)
|
||||
idx := randSrc.Int() % len(vals)
|
||||
key := vals[idx]
|
||||
act := validateIterBackwardFrom(t, list, key)
|
||||
assert.Equal(t, idx+1, act)
|
||||
@@ -395,8 +395,8 @@ func validateIterBackwardFrom(t *testing.T, l *List, key []byte) (count int) {
|
||||
func randomVals(cnt int64) (vals [][]byte) {
|
||||
vals = make([][]byte, cnt)
|
||||
for i := range vals {
|
||||
bb := make([]byte, (src.Int63()%91)+10)
|
||||
src.Read(bb)
|
||||
bb := make([]byte, (randSrc.Int63()%91)+10)
|
||||
randSrc.Read(bb)
|
||||
vals[i] = bb
|
||||
}
|
||||
return
|
||||
@@ -406,7 +406,7 @@ func randomInts(cnt int64) (vals [][]byte) {
|
||||
vals = make([][]byte, cnt)
|
||||
for i := range vals {
|
||||
vals[i] = make([]byte, 8)
|
||||
v := uint64(src.Int63())
|
||||
v := uint64(randSrc.Int63())
|
||||
binary.LittleEndian.PutUint64(vals[i], v)
|
||||
}
|
||||
return
|
||||
@@ -436,8 +436,8 @@ func iterAllBackwards(l *List, cb func([]byte, []byte)) {
|
||||
}
|
||||
}
|
||||
|
||||
func testSkipListCheckpoints(t *testing.T, compare ValueCmp, data ...[]byte) {
|
||||
src.Shuffle(len(data), func(i, j int) {
|
||||
func testSkipListCheckpoints(t *testing.T, compare KeyOrder, data ...[]byte) {
|
||||
randSrc.Shuffle(len(data), func(i, j int) {
|
||||
data[i], data[j] = data[j], data[i]
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user