use ordinal bounds for noms and prolly table partitions

This commit is contained in:
Andy Arthur
2022-03-28 17:19:52 -07:00
parent eb9ebcda63
commit b8d0e90187
3 changed files with 13 additions and 55 deletions

View File

@@ -65,16 +65,11 @@ type prollyRowIter struct {
var _ sql.RowIter = prollyRowIter{}
var _ sql.RowIter2 = prollyRowIter{}
func NewProllyRowIter(ctx context.Context, sch schema.Schema, rows prolly.Map, rng prolly.Range, projections []string) (sql.RowIter, error) {
func NewProllyRowIter(ctx context.Context, sch schema.Schema, rows prolly.Map, iter prolly.MapRangeIter, projections []string) (sql.RowIter, error) {
if schema.IsKeyless(sch) {
return nil, errors.New("format __DOLT_1__ does not support keyless tables")
}
iter, err := rows.IterRange(ctx, rng)
if err != nil {
return nil, err
}
return rowIterFromMapIter(ctx, sch, rows, iter, projections)
}

View File

@@ -25,7 +25,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
"github.com/dolthub/dolt/go/libraries/doltcore/table"
"github.com/dolthub/dolt/go/libraries/utils/set"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/types"
)
@@ -175,7 +174,13 @@ func ProllyRowIterFromPartition(ctx context.Context, tbl *doltdb.Table, projecti
if err != nil {
return nil, err
}
return index.NewProllyRowIter(ctx, sch, rows, partition.rowRange, projections)
iter, err := rows.IterOrdinalRange(ctx, partition.start, partition.end)
if err != nil {
return nil, err
}
return index.NewProllyRowIter(ctx, sch, rows, iter, projections)
}
// Returns a |sql.RowIter| for a full table scan for the given |table|. If
@@ -200,11 +205,5 @@ func TableToRowIter(ctx *sql.Context, table *WritableDoltTable, columns []string
rowData: data,
}
if types.IsFormat_DOLT_1(data.Format()) {
m := durable.ProllyMapFromIndex(data)
kd, _ := m.Descriptors()
p.rowRange = prolly.Range{Start: nil, Stop: nil, Desc: kd}
}
return newRowIterator(ctx, t, columns, p)
}

View File

@@ -41,7 +41,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor/creation"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/types"
)
@@ -688,9 +687,6 @@ type doltTablePartition struct {
// half-open index range of partition: [start, end)
start, end uint64
// value range of partition for "prolly" implementation
rowRange prolly.Range
rowData durable.Index
}
@@ -701,21 +697,11 @@ func partitionsFromRows(ctx context.Context, rows durable.Index) []doltTablePart
}
}
nbf := rows.Format()
switch nbf {
case types.Format_LD_1, types.Format_7_18, types.Format_DOLT_DEV:
nm := durable.NomsMapFromIndex(rows)
return partitionsFromNomsRows(nm, durable.VrwFromNomsIndex(rows))
case types.Format_DOLT_1:
return partitionsFromProllyRows(rows)
}
return nil
return partitionsFromTableRows(rows)
}
func partitionsFromNomsRows(rows types.Map, vrw types.ValueReadWriter) []doltTablePartition {
numElements := rows.Len()
func partitionsFromTableRows(rows durable.Index) []doltTablePartition {
numElements := rows.Count()
itemsPerPartition := MaxRowsPerPartition
numPartitions := (numElements / itemsPerPartition) + 1
@@ -734,41 +720,19 @@ func partitionsFromNomsRows(rows types.Map, vrw types.ValueReadWriter) []doltTab
partitions[i] = doltTablePartition{
start: i * itemsPerPartition,
end: (i + 1) * itemsPerPartition,
rowData: durable.IndexFromNomsMap(rows, vrw),
rowData: rows,
}
}
partitions[numPartitions-1] = doltTablePartition{
start: (numPartitions - 1) * itemsPerPartition,
end: numElements,
rowData: durable.IndexFromNomsMap(rows, vrw),
rowData: rows,
}
return partitions
}
func partitionsFromProllyRows(rows durable.Index) []doltTablePartition {
pm := durable.ProllyMapFromIndex(rows)
keyDesc, _ := pm.Descriptors()
// naively divide map by top-level keys
keys := prolly.PartitionKeysFromMap(pm)
first := prolly.LesserOrEqualRange(keys[0], keyDesc)
parts := make([]doltTablePartition, len(keys))
parts[0] = doltTablePartition{rowRange: first, rowData: rows}
for i := range parts {
if i == 0 {
continue
}
rng := prolly.OpenStartRange(keys[i-1], keys[i], keyDesc)
parts[i] = doltTablePartition{rowRange: rng, rowData: rows}
}
return parts
}
// Key returns the key for this partition, which must uniquely identity the partition.
func (p doltTablePartition) Key() []byte {
return []byte(strconv.FormatUint(p.start, 10) + " >= i < " + strconv.FormatUint(p.end, 10))