From f1afe8974380debb140e524936bd980de0682a59 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Fri, 18 Feb 2022 15:35:12 -0800 Subject: [PATCH] added GetManyFields method to tuple --- go/store/val/tuple.go | 80 ++++++++++++++++++++++++++++++-------- go/store/val/tuple_test.go | 74 ++++++++++++++++++++++++++++------- 2 files changed, 122 insertions(+), 32 deletions(-) diff --git a/go/store/val/tuple.go b/go/store/val/tuple.go index 55ef596371..20d63cb0e6 100644 --- a/go/store/val/tuple.go +++ b/go/store/val/tuple.go @@ -24,7 +24,7 @@ const ( MaxTupleFields = 4096 MaxTupleDataSize ByteSize = math.MaxUint16 - numFieldsSize ByteSize = 2 + countSize ByteSize = 2 ) // todo(andy): update comment @@ -119,7 +119,7 @@ func CloneTuple(pool pool.BuffPool, tup Tuple) Tuple { func allocateTuple(pool pool.BuffPool, bufSz ByteSize, fields int) (tup Tuple, offs offsets) { offSz := offsetsSize(fields) - tup = pool.Get(uint64(bufSz + offSz + numFieldsSize)) + tup = pool.Get(uint64(bufSz + offSz + countSize)) writeFieldCount(tup, fields) offs = offsets(tup[bufSz : bufSz+offSz]) @@ -129,35 +129,81 @@ func allocateTuple(pool pool.BuffPool, bufSz ByteSize, fields int) (tup Tuple, o // GetField returns the value for field |i|. func (tup Tuple) GetField(i int) (field []byte) { - sz := tup.size() cnt := tup.Count() + if i >= cnt { + return nil + } - // slice the offsets array - offStop := sz - numFieldsSize - bufStop := offStop - offsetsSize(cnt) + sz := ByteSize(len(tup)) + split := sz - uint16Size*ByteSize(cnt) sb := SlicedBuffer{ - Buf: tup[:bufStop], - Offs: offsets(tup[bufStop:offStop]), + Buf: tup[:split], + Offs: offsets(tup[split : sz-countSize]), } + field = sb.GetSlice(i) if len(field) == 0 { - field = nil // NULL + return nil // NULL } return } -func (tup Tuple) size() ByteSize { - return ByteSize(len(tup)) +// GetManyFields returns the fields specified in |indexes|. It assumes +// field indexes are provided in ascending order. It populates field data +// into |slices| to avoid allocating. +func (tup Tuple) GetManyFields(indexes []int, slices [][]byte) [][]byte { + cnt := tup.Count() + + sz := ByteSize(len(tup)) + split := sz - uint16Size*ByteSize(cnt) + offs := offsets(tup[split : sz-countSize]) + + k, start, stop := int(0), uint16(0), uint16(0) + + // we don't have an explicit "start" for the + // first field, handle it separately + if indexes[k] == 0 { + if cnt == 1 { + stop = uint16(split) + } else { + stop = readUint16(offs[:uint16Size]) + } + slices[0] = tup[:stop] + k++ + } + + // we don't have an explicit "stop" for the + // last field, handle it separately + last := cnt - 1 + + for ; k < len(indexes) && indexes[k] < last; k++ { + i := indexes[k] + start = readUint16(offs[(i-1)*2 : i*2]) + stop = readUint16(offs[i*2 : (i+1)*2]) + slices[k] = tup[start:stop] + } + + if k < len(indexes) && indexes[k] == last { + os := ByteSize(len(offs)) + start = readUint16(offs[os-uint16Size:]) + stop = uint16(split) + slices[k] = tup[start:stop] + } + + // set NULL values + for i, s := range slices { + if len(s) == 0 { + slices[i] = nil + } + } + + return slices } func (tup Tuple) Count() int { - return tup.fieldCount() -} - -func (tup Tuple) fieldCount() int { - sl := tup[tup.size()-numFieldsSize:] + sl := tup[len(tup)-int(countSize):] return int(readUint16(sl)) } @@ -170,6 +216,6 @@ func sizeOf(val []byte) ByteSize { } func writeFieldCount(tup Tuple, count int) { - sl := tup[len(tup)-int(numFieldsSize):] + sl := tup[len(tup)-int(countSize):] writeUint16(sl, uint16(count)) } diff --git a/go/store/val/tuple_test.go b/go/store/val/tuple_test.go index 9c84cd3b37..faa1961ff1 100644 --- a/go/store/val/tuple_test.go +++ b/go/store/val/tuple_test.go @@ -16,6 +16,7 @@ package val import ( "math/rand" + "sort" "testing" "github.com/stretchr/testify/assert" @@ -25,32 +26,75 @@ import ( var testPool = pool.NewBuffPool() +// todo(andy): randomize test seed +var testRand = rand.New(rand.NewSource(1)) + func TestNewTuple(t *testing.T) { t.Run("test tuple round trip", func(t *testing.T) { roundTripBytes(t) }) + t.Run("test tuple get many", func(t *testing.T) { + testTupleGetMany(t) + }) } func roundTripBytes(t *testing.T) { - randomBytes := func(t *testing.T) (fields [][]byte) { - fields = make([][]byte, (rand.Uint32()%19)+1) - assert.True(t, len(fields) > 0) - for i := range fields { - if rand.Uint32()%4 == 0 { - // 25% NULL - continue - } - fields[i] = make([]byte, rand.Uint32()%20) - rand.Read(fields[i]) - } - return - } - for n := 0; n < 100; n++ { - fields := randomBytes(t) + fields := randomByteFields(t) tup := NewTuple(testPool, fields...) for i, field := range fields { assert.Equal(t, field, tup.GetField(i)) } } } + +func testTupleGetMany(t *testing.T) { + for n := 0; n < 100; n++ { + fields := randomByteFields(t) + tup := NewTuple(testPool, fields...) + + indexes := randomFieldIndexes(fields) + actual := tup.GetManyFields(indexes, make([][]byte, len(indexes))) + + for k, idx := range indexes { + exp := fields[idx] + act := actual[k] + assert.Equal(t, exp, act) + } + } +} + +func randomByteFields(t *testing.T) (fields [][]byte) { + fields = make([][]byte, rand.Intn(19)+1) + assert.True(t, len(fields) > 0) + for i := range fields { + if rand.Uint32()%4 == 0 { + // 25% NULL + fields[i] = nil + continue + } + fields[i] = make([]byte, rand.Intn(19)+1) + rand.Read(fields[i]) + } + return +} + +func randomFieldIndexes(fields [][]byte) []int { + indexes := make([]int, len(fields)) + for i := range indexes { + indexes[i] = i + } + + k := testRand.Intn(len(indexes)) + if k == 0 { + k++ + } + + testRand.Shuffle(len(indexes), func(i, j int) { + indexes[i], indexes[j] = indexes[j], indexes[i] + }) + indexes = indexes[:k] + sort.Ints(indexes) + + return indexes +}