increase max skip.List height from 5 to 10, improve height promotion probabilities, cleanup comparator callback types

This commit is contained in:
Andy Arthur
2022-09-07 16:36:55 -07:00
parent f13bedece9
commit b336716fe0
3 changed files with 68 additions and 90 deletions

View File

@@ -133,11 +133,11 @@ func memIterFromRange(list *skip.List, rng Range) *memRangeIter {
}
}
// skipSearchFromRange is a skip.SearchFn used to initialize
// a skip.List iterator for a given Range. The skip.SearchFn
// skipSearchFromRange is a skip.SeekFn used to initialize
// a skip.List iterator for a given Range. The skip.SeekFn
// returns true if the iter being initialized is not yet
// within the bounds of Range |rng|.
func skipSearchFromRange(rng Range) skip.SearchFn {
func skipSearchFromRange(rng Range) skip.SeekFn {
return func(nodeKey []byte) bool {
if nodeKey == nil {
return false

View File

@@ -17,35 +17,28 @@ package skip
import (
"math"
"math/rand"
"github.com/zeebo/xxh3"
)
const (
maxCount = math.MaxUint32 - 1
maxHeight = uint8(5)
highest = maxHeight - 1
maxHeight = 9
maxCount = math.MaxUint32 - 1
sentinelId = nodeId(0)
)
type KeyOrder func(l, r []byte) (cmp int)
type SeekFn func(key []byte) (advance bool)
type List struct {
nodes []skipNode
count uint32
nodes []skipNode
count uint32
checkpoint nodeId
cmp ValueCmp
salt uint64
keyOrder KeyOrder
}
type ValueCmp func(left, right []byte) int
type SearchFn func(nodeKey []byte) bool
type nodeId uint32
type skipPointer [maxHeight]nodeId
type skipPointer [maxHeight + 1]nodeId
type skipNode struct {
key, val []byte
@@ -56,7 +49,7 @@ type skipNode struct {
height uint8
}
func NewSkipList(cmp ValueCmp) *List {
func NewSkipList(order KeyOrder) *List {
nodes := make([]skipNode, 0, 8)
// initialize sentinel node
@@ -71,8 +64,7 @@ func NewSkipList(cmp ValueCmp) *List {
return &List{
nodes: nodes,
checkpoint: nodeId(1),
cmp: cmp,
salt: rand.Uint64(),
keyOrder: order,
}
}
@@ -123,7 +115,7 @@ func (l *List) Put(key, val []byte) {
if key == nil {
panic("key must be non-nil")
}
if l.Count() >= maxCount {
if len(l.nodes) >= maxCount {
panic("list has no capacity")
}
@@ -147,7 +139,7 @@ func (l *List) pathToKey(key []byte) (path skipPointer) {
next := l.headPointer()
prev := sentinelId
for lvl := int(highest); lvl >= 0; {
for lvl := int(maxHeight); lvl >= 0; {
curr := l.getNode(next[lvl])
// descend if we can't advance at |lvl|
@@ -168,7 +160,7 @@ func (l *List) pathBeforeKey(key []byte) (path skipPointer) {
next := l.headPointer()
prev := sentinelId
for lvl := int(highest); lvl >= 0; {
for lvl := int(maxHeight); lvl >= 0; {
curr := l.getNode(next[lvl])
// descend if we can't advance at |lvl|
@@ -190,7 +182,7 @@ func (l *List) insert(key, value []byte, path skipPointer) {
key: key,
val: value,
id: l.nextNodeId(),
height: rollHeight(key, l.salt),
height: rollHeight(),
}
l.nodes = append(l.nodes, novel)
@@ -255,22 +247,20 @@ func (it *ListIter) Retreat() {
func (l *List) GetIterAt(key []byte) (it *ListIter) {
return l.GetIterFromSearchFn(func(nodeKey []byte) bool {
return l.compareKeysWithFn(key, nodeKey, l.cmp) > 0
return l.compareKeys(key, nodeKey) > 0
})
}
func (l *List) GetIterFromSearchFn(kontinue SearchFn) (it *ListIter) {
func (l *List) GetIterFromSearchFn(fn SeekFn) (it *ListIter) {
it = &ListIter{
curr: l.seekWithSearchFn(kontinue),
curr: l.seekWithFn(fn),
list: l,
}
if it.curr.id == sentinelId {
// try to keep |it| in bounds if |key| is
// greater than the largest key in |l|
it.Retreat()
}
return
}
@@ -290,20 +280,16 @@ func (l *List) IterAtEnd() *ListIter {
// seek returns the skipNode with the smallest key >= |key|.
func (l *List) seek(key []byte) skipNode {
return l.seekWithCompare(key, l.cmp)
}
func (l *List) seekWithCompare(key []byte, cmp ValueCmp) (node skipNode) {
return l.seekWithSearchFn(func(nodeKey []byte) bool {
return l.compareKeysWithFn(key, nodeKey, cmp) > 0
return l.seekWithFn(func(curr []byte) (advance bool) {
return l.compareKeys(key, curr) > 0
})
}
func (l *List) seekWithSearchFn(kontinue SearchFn) (node skipNode) {
func (l *List) seekWithFn(cb SeekFn) (node skipNode) {
ptr := l.headPointer()
for h := int64(highest); h >= 0; h-- {
for h := int64(maxHeight); h >= 0; h-- {
node = l.getNode(ptr[h])
for kontinue(node.key) {
for cb(node.key) {
ptr = node.next
node = l.getNode(ptr[h])
}
@@ -336,43 +322,35 @@ func (l *List) nextNodeId() nodeId {
return nodeId(len(l.nodes))
}
func (l *List) compare(left, right skipNode) int {
return l.compareKeys(left.key, right.key)
}
func (l *List) compareKeys(left, right []byte) int {
return l.compareKeysWithFn(left, right, l.cmp)
}
func (l *List) compareKeysWithFn(left, right []byte, cmp ValueCmp) int {
if right == nil {
return -1 // |right| is sentinel key
}
return cmp(left, right)
return l.keyOrder(left, right)
}
const (
pattern0 = uint64(1<<3 - 1)
pattern1 = uint64(1<<6 - 1)
pattern2 = uint64(1<<9 - 1)
pattern3 = uint64(1<<12 - 1)
var (
// Precompute the skiplist probabilities so that the optimal
// p-value can be used (inverse of Euler's number).
//
// https://github.com/andy-kimball/arenaskl/blob/master/skl.go
probabilities = [maxHeight]uint32{}
randSrc = rand.New(rand.NewSource(rand.Int63()))
)
func rollHeight(key []byte, salt uint64) (h uint8) {
roll := xxh3.HashSeed(key, salt)
patterns := []uint64{
pattern0,
pattern1,
pattern2,
pattern3,
func init() {
p := float64(1.0)
for i := uint8(0); i < maxHeight; i++ {
p /= math.E
probabilities[i] = uint32(float64(math.MaxUint32) * p)
}
}
for _, pat := range patterns {
if uint64(roll)&pat != pat {
break
}
func rollHeight() (h uint8) {
rnd := randSrc.Uint32()
h = 0
for h < maxHeight && rnd <= probabilities[h] {
h++
}
return
}

View File

@@ -25,10 +25,10 @@ import (
"github.com/stretchr/testify/assert"
)
// var src = rand.New(rand.NewSource(time.Now().Unix()))
var src = rand.New(rand.NewSource(0))
func TestSkipList(t *testing.T) {
// set constant seed to improve debugging
randSrc = rand.New(rand.NewSource(0))
t.Run("test skip list", func(t *testing.T) {
vals := [][]byte{
b("a"), b("b"), b("c"), b("d"), b("e"),
@@ -39,7 +39,7 @@ func TestSkipList(t *testing.T) {
})
t.Run("test skip list of random bytes", func(t *testing.T) {
vals := randomVals((src.Int63() % 10_000) + 100)
vals := randomVals((randSrc.Int63() % 10_000) + 100)
testSkipList(t, bytes.Compare, vals...)
})
t.Run("test with custom compare function", func(t *testing.T) {
@@ -48,7 +48,7 @@ func TestSkipList(t *testing.T) {
r := int64(binary.LittleEndian.Uint64(right))
return int(l - r)
}
vals := randomInts((src.Int63() % 10_000) + 100)
vals := randomInts((randSrc.Int63() % 10_000) + 100)
testSkipList(t, compare, vals...)
})
}
@@ -64,7 +64,7 @@ func TestSkipListCheckpoints(t *testing.T) {
})
t.Run("test skip list of random bytes", func(t *testing.T) {
vals := randomVals((src.Int63() % 10_000) + 100)
vals := randomVals((randSrc.Int63() % 10_000) + 100)
testSkipListCheckpoints(t, bytes.Compare, vals...)
})
t.Run("test with custom compare function", func(t *testing.T) {
@@ -73,7 +73,7 @@ func TestSkipListCheckpoints(t *testing.T) {
r := int64(binary.LittleEndian.Uint64(right))
return int(l - r)
}
vals := randomInts((src.Int63() % 10_000) + 100)
vals := randomInts((randSrc.Int63() % 10_000) + 100)
testSkipListCheckpoints(t, compare, vals...)
})
}
@@ -81,9 +81,9 @@ func TestSkipListCheckpoints(t *testing.T) {
func TestMemoryFootprint(t *testing.T) {
var sz int
sz = int(unsafe.Sizeof(skipNode{}))
assert.Equal(t, 80, sz)
assert.Equal(t, 104, sz)
sz = int(unsafe.Sizeof(skipPointer{}))
assert.Equal(t, 20, sz)
assert.Equal(t, 40, sz)
}
func BenchmarkList(b *testing.B) {
@@ -205,8 +205,8 @@ func BenchmarkList(b *testing.B) {
})
}
func testSkipList(t *testing.T, compare ValueCmp, vals ...[]byte) {
src.Shuffle(len(vals), func(i, j int) {
func testSkipList(t *testing.T, compare KeyOrder, vals ...[]byte) {
randSrc.Shuffle(len(vals), func(i, j int) {
vals[i], vals[j] = vals[j], vals[i]
})
@@ -244,8 +244,8 @@ func testSkipListPuts(t *testing.T, list *List, vals ...[]byte) {
}
func testSkipListGets(t *testing.T, list *List, vals ...[]byte) {
// get in different order
src.Shuffle(len(vals), func(i, j int) {
// get in different keyOrder
randSrc.Shuffle(len(vals), func(i, j int) {
vals[i], vals[j] = vals[j], vals[i]
})
@@ -268,7 +268,7 @@ func testSkipListUpdates(t *testing.T, list *List, vals ...[]byte) {
}
assert.Equal(t, len(vals), list.Count())
src.Shuffle(len(vals), func(i, j int) {
randSrc.Shuffle(len(vals), func(i, j int) {
vals[i], vals[j] = vals[j], vals[i]
})
for _, exp := range vals {
@@ -282,7 +282,7 @@ func testSkipListUpdates(t *testing.T, list *List, vals ...[]byte) {
}
func testSkipListIterForward(t *testing.T, list *List, vals ...[]byte) {
// put |vals| back in order
// put |vals| back in keyOrder
sort.Slice(vals, func(i, j int) bool {
return list.compareKeys(vals[i], vals[j]) < 0
})
@@ -297,7 +297,7 @@ func testSkipListIterForward(t *testing.T, list *List, vals ...[]byte) {
// test iter at
for k := 0; k < 10; k++ {
idx = src.Int() % len(vals)
idx = randSrc.Int() % len(vals)
key := vals[idx]
act := validateIterForwardFrom(t, list, key)
exp := len(vals) - idx
@@ -311,14 +311,14 @@ func testSkipListIterForward(t *testing.T, list *List, vals ...[]byte) {
}
func testSkipListIterBackward(t *testing.T, list *List, vals ...[]byte) {
// put |vals| back in order
// put |vals| back in keyOrder
sort.Slice(vals, func(i, j int) bool {
return list.compareKeys(vals[i], vals[j]) < 0
})
// test iter at
for k := 0; k < 10; k++ {
idx := src.Int() % len(vals)
idx := randSrc.Int() % len(vals)
key := vals[idx]
act := validateIterBackwardFrom(t, list, key)
assert.Equal(t, idx+1, act)
@@ -395,8 +395,8 @@ func validateIterBackwardFrom(t *testing.T, l *List, key []byte) (count int) {
func randomVals(cnt int64) (vals [][]byte) {
vals = make([][]byte, cnt)
for i := range vals {
bb := make([]byte, (src.Int63()%91)+10)
src.Read(bb)
bb := make([]byte, (randSrc.Int63()%91)+10)
randSrc.Read(bb)
vals[i] = bb
}
return
@@ -406,7 +406,7 @@ func randomInts(cnt int64) (vals [][]byte) {
vals = make([][]byte, cnt)
for i := range vals {
vals[i] = make([]byte, 8)
v := uint64(src.Int63())
v := uint64(randSrc.Int63())
binary.LittleEndian.PutUint64(vals[i], v)
}
return
@@ -436,8 +436,8 @@ func iterAllBackwards(l *List, cb func([]byte, []byte)) {
}
}
func testSkipListCheckpoints(t *testing.T, compare ValueCmp, data ...[]byte) {
src.Shuffle(len(data), func(i, j int) {
func testSkipListCheckpoints(t *testing.T, compare KeyOrder, data ...[]byte) {
randSrc.Shuffle(len(data), func(i, j int) {
data[i], data[j] = data[j], data[i]
})