added GetManyFields method to tuple

2026-02-12 18:59:03 -06:00 · 2022-02-18 15:35:12 -08:00
parent b0c12ba391
commit f1afe89743
2 changed files with 122 additions and 32 deletions
--- a/go/store/val/tuple.go
+++ b/go/store/val/tuple.go
@@ -24,7 +24,7 @@ const (
 	MaxTupleFields            = 4096
 	MaxTupleDataSize ByteSize = math.MaxUint16

-	numFieldsSize ByteSize = 2
+	countSize ByteSize = 2
 )

 // todo(andy): update comment
@@ -119,7 +119,7 @@ func CloneTuple(pool pool.BuffPool, tup Tuple) Tuple {

 func allocateTuple(pool pool.BuffPool, bufSz ByteSize, fields int) (tup Tuple, offs offsets) {
 	offSz := offsetsSize(fields)
-	tup = pool.Get(uint64(bufSz + offSz + numFieldsSize))
+	tup = pool.Get(uint64(bufSz + offSz + countSize))

 	writeFieldCount(tup, fields)
 	offs = offsets(tup[bufSz : bufSz+offSz])
@@ -129,35 +129,81 @@ func allocateTuple(pool pool.BuffPool, bufSz ByteSize, fields int) (tup Tuple, o

 // GetField returns the value for field |i|.
 func (tup Tuple) GetField(i int) (field []byte) {
-	sz := tup.size()
 	cnt := tup.Count()
+	if i >= cnt {
+		return nil
+	}

-	// slice the offsets array
-	offStop := sz - numFieldsSize
-	bufStop := offStop - offsetsSize(cnt)
+	sz := ByteSize(len(tup))
+	split := sz - uint16Size*ByteSize(cnt)

 	sb := SlicedBuffer{
-		Buf:  tup[:bufStop],
-		Offs: offsets(tup[bufStop:offStop]),
+		Buf:  tup[:split],
+		Offs: offsets(tup[split : sz-countSize]),
 	}
+
 	field = sb.GetSlice(i)

 	if len(field) == 0 {
-		field = nil // NULL
+		return nil // NULL
 	}
 	return
 }

-func (tup Tuple) size() ByteSize {
-	return ByteSize(len(tup))
+// GetManyFields returns the fields specified in |indexes|. It assumes
+// field indexes are provided in ascending order. It populates field data
+// into |slices| to avoid allocating.
+func (tup Tuple) GetManyFields(indexes []int, slices [][]byte) [][]byte {
+	cnt := tup.Count()
+
+	sz := ByteSize(len(tup))
+	split := sz - uint16Size*ByteSize(cnt)
+	offs := offsets(tup[split : sz-countSize])
+
+	k, start, stop := int(0), uint16(0), uint16(0)
+
+	// we don't have an explicit "start" for the
+	// first field, handle it separately
+	if indexes[k] == 0 {
+		if cnt == 1 {
+			stop = uint16(split)
+		} else {
+			stop = readUint16(offs[:uint16Size])
+		}
+		slices[0] = tup[:stop]
+		k++
+	}
+
+	// we don't have an explicit "stop" for the
+	// last field, handle it separately
+	last := cnt - 1
+
+	for ; k < len(indexes) && indexes[k] < last; k++ {
+		i := indexes[k]
+		start = readUint16(offs[(i-1)*2 : i*2])
+		stop = readUint16(offs[i*2 : (i+1)*2])
+		slices[k] = tup[start:stop]
+	}
+
+	if k < len(indexes) && indexes[k] == last {
+		os := ByteSize(len(offs))
+		start = readUint16(offs[os-uint16Size:])
+		stop = uint16(split)
+		slices[k] = tup[start:stop]
+	}
+
+	// set NULL values
+	for i, s := range slices {
+		if len(s) == 0 {
+			slices[i] = nil
+		}
+	}
+
+	return slices
 }

 func (tup Tuple) Count() int {
-	return tup.fieldCount()
-}
-
-func (tup Tuple) fieldCount() int {
-	sl := tup[tup.size()-numFieldsSize:]
+	sl := tup[len(tup)-int(countSize):]
 	return int(readUint16(sl))
 }

@@ -170,6 +216,6 @@ func sizeOf(val []byte) ByteSize {
 }

 func writeFieldCount(tup Tuple, count int) {
-	sl := tup[len(tup)-int(numFieldsSize):]
+	sl := tup[len(tup)-int(countSize):]
 	writeUint16(sl, uint16(count))
 }
--- a/go/store/val/tuple_test.go
+++ b/go/store/val/tuple_test.go
@@ -16,6 +16,7 @@ package val

 import (
 	"math/rand"
+	"sort"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -25,32 +26,75 @@ import (

 var testPool = pool.NewBuffPool()

+// todo(andy): randomize test seed
+var testRand = rand.New(rand.NewSource(1))
+
 func TestNewTuple(t *testing.T) {
 	t.Run("test tuple round trip", func(t *testing.T) {
 		roundTripBytes(t)
 	})
+	t.Run("test tuple get many", func(t *testing.T) {
+		testTupleGetMany(t)
+	})
 }

 func roundTripBytes(t *testing.T) {
-	randomBytes := func(t *testing.T) (fields [][]byte) {
-		fields = make([][]byte, (rand.Uint32()%19)+1)
-		assert.True(t, len(fields) > 0)
-		for i := range fields {
-			if rand.Uint32()%4 == 0 {
-				// 25% NULL
-				continue
-			}
-			fields[i] = make([]byte, rand.Uint32()%20)
-			rand.Read(fields[i])
-		}
-		return
-	}
-
 	for n := 0; n < 100; n++ {
-		fields := randomBytes(t)
+		fields := randomByteFields(t)
 		tup := NewTuple(testPool, fields...)
 		for i, field := range fields {
 			assert.Equal(t, field, tup.GetField(i))
 		}
 	}
 }
+
+func testTupleGetMany(t *testing.T) {
+	for n := 0; n < 100; n++ {
+		fields := randomByteFields(t)
+		tup := NewTuple(testPool, fields...)
+
+		indexes := randomFieldIndexes(fields)
+		actual := tup.GetManyFields(indexes, make([][]byte, len(indexes)))
+
+		for k, idx := range indexes {
+			exp := fields[idx]
+			act := actual[k]
+			assert.Equal(t, exp, act)
+		}
+	}
+}
+
+func randomByteFields(t *testing.T) (fields [][]byte) {
+	fields = make([][]byte, rand.Intn(19)+1)
+	assert.True(t, len(fields) > 0)
+	for i := range fields {
+		if rand.Uint32()%4 == 0 {
+			// 25% NULL
+			fields[i] = nil
+			continue
+		}
+		fields[i] = make([]byte, rand.Intn(19)+1)
+		rand.Read(fields[i])
+	}
+	return
+}
+
+func randomFieldIndexes(fields [][]byte) []int {
+	indexes := make([]int, len(fields))
+	for i := range indexes {
+		indexes[i] = i
+	}
+
+	k := testRand.Intn(len(indexes))
+	if k == 0 {
+		k++
+	}
+
+	testRand.Shuffle(len(indexes), func(i, j int) {
+		indexes[i], indexes[j] = indexes[j], indexes[i]
+	})
+	indexes = indexes[:k]
+	sort.Ints(indexes)
+
+	return indexes
+}