From b4c5ccf46195f6ec38e4d299ccfd6bf96a35f3bd Mon Sep 17 00:00:00 2001 From: Maximilian Hoffman Date: Wed, 21 Aug 2024 13:13:07 -0700 Subject: [PATCH] Skip filter iter, with more test fixes (#8288) * Revert "Revert "Skip filterIter match check when a key range is contiguous (#8242)"" This reverts commit 277c1d7d8b42de4338b06305cc4d78c90755c775. * fix string matching bug, test added in GMS#2639 * Better comment --- .../doltcore/sqle/index/dolt_index.go | 26 ++++++++++++++++--- .../doltcore/sqle/index/index_reader.go | 2 +- go/libraries/doltcore/sqle/kvexec/builder.go | 2 +- go/store/prolly/tuple_map.go | 10 ++++++- go/store/prolly/tuple_range.go | 9 +++++++ 5 files changed, 43 insertions(+), 6 deletions(-) diff --git a/go/libraries/doltcore/sqle/index/dolt_index.go b/go/libraries/doltcore/sqle/index/dolt_index.go index 9f06bcd732..4e82cb3a68 100644 --- a/go/libraries/doltcore/sqle/index/dolt_index.go +++ b/go/libraries/doltcore/sqle/index/dolt_index.go @@ -1206,7 +1206,14 @@ func (di *doltIndex) prollyRangesFromSqlRanges(ctx context.Context, ns tree.Node pranges := make([]prolly.Range, len(ranges)) for k, rng := range ranges { fields := make([]prolly.RangeField, len(rng)) + skipRangeMatchCallback := true for j, expr := range rng { + if !sqltypes.IsInteger(expr.Typ) { + // String, decimal, float, datetime ranges can return + // false positive prefix matches. More precise range.Matches + // comparison is required. + skipRangeMatchCallback = false + } if rangeCutIsBinding(expr.LowerBound) { // accumulate bound values in |tb| v, err := getRangeCutValue(expr.LowerBound, rng[j].Typ) @@ -1266,6 +1273,8 @@ func (di *doltIndex) prollyRangesFromSqlRanges(ctx context.Context, ns tree.Node } order := di.keyBld.Desc.Comparator() + var foundDiscontinuity bool + var isContiguous bool = true for i, field := range fields { // lookups on non-unique indexes can't be point lookups typ := di.keyBld.Desc.Types[i] @@ -1279,11 +1288,22 @@ func (di *doltIndex) prollyRangesFromSqlRanges(ctx context.Context, ns tree.Node // infinity bound fields[i].BoundsAreEqual = false } + + nilBound := field.Lo.Value == nil && field.Hi.Value == nil + if foundDiscontinuity || nilBound { + // A discontinous variable followed by any restriction + // can partition the key space. + isContiguous = false + } + foundDiscontinuity = foundDiscontinuity || !fields[i].BoundsAreEqual || nilBound + } pranges[k] = prolly.Range{ - Fields: fields, - Desc: di.keyBld.Desc, - Tup: tup, + Fields: fields, + Desc: di.keyBld.Desc, + Tup: tup, + SkipRangeMatchCallback: skipRangeMatchCallback, + IsContiguous: isContiguous, } } return pranges, nil diff --git a/go/libraries/doltcore/sqle/index/index_reader.go b/go/libraries/doltcore/sqle/index/index_reader.go index b33f446c29..e30a6ad3aa 100644 --- a/go/libraries/doltcore/sqle/index/index_reader.go +++ b/go/libraries/doltcore/sqle/index/index_reader.go @@ -421,7 +421,7 @@ type coveringIndexImplBuilder struct { keyMap, valMap, ordMap val.OrdinalMapping } -func NewSequenceMapIter(ctx context.Context, ib IndexScanBuilder, ranges []prolly.Range, reverse bool) (prolly.MapIter, error) { +func NewSequenceRangeIter(ctx context.Context, ib IndexScanBuilder, ranges []prolly.Range, reverse bool) (prolly.MapIter, error) { if len(ranges) == 0 { return &strictLookupIter{}, nil } diff --git a/go/libraries/doltcore/sqle/kvexec/builder.go b/go/libraries/doltcore/sqle/kvexec/builder.go index ae22c10553..af0cd87079 100644 --- a/go/libraries/doltcore/sqle/kvexec/builder.go +++ b/go/libraries/doltcore/sqle/kvexec/builder.go @@ -326,7 +326,7 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M return prolly.Map{}, nil, nil, nil, nil, nil, err } - srcIter, err = index.NewSequenceMapIter(ctx, lb, prollyRanges, l.IsReverse) + srcIter, err = index.NewSequenceRangeIter(ctx, lb, prollyRanges, l.IsReverse) if err != nil { return prolly.Map{}, nil, nil, nil, nil, nil, err } diff --git a/go/store/prolly/tuple_map.go b/go/store/prolly/tuple_map.go index 1323ea1d5d..38bb73a5b9 100644 --- a/go/store/prolly/tuple_map.go +++ b/go/store/prolly/tuple_map.go @@ -314,7 +314,15 @@ func (m Map) IterRange(ctx context.Context, rng Range) (iter MapIter, err error) } else { iter, err = treeIterFromRange(ctx, m.tuples.Root, m.tuples.NodeStore, rng) } - return filteredIter{iter: iter, rng: rng}, nil + if err != nil { + return nil, err + } + if !rng.SkipRangeMatchCallback || !rng.IsContiguous { + // range.Matches check is required if a type is imprecise + // or a key range is non-contiguous on disk + iter = filteredIter{iter: iter, rng: rng} + } + return iter, nil } // IterRangeReverse returns a mutableMapIter that iterates over a Range backwards. diff --git a/go/store/prolly/tuple_range.go b/go/store/prolly/tuple_range.go index 32af35dbcf..e9fde518e2 100644 --- a/go/store/prolly/tuple_range.go +++ b/go/store/prolly/tuple_range.go @@ -58,6 +58,15 @@ type Range struct { Fields []RangeField Desc val.TupleDesc Tup val.Tuple + // SkipRangeMatchCallback is false if any type in the index range + // expression can return a false positive match. Strings, datetimes, + // floats, and decimals ranges can prefix match invalid values. + SkipRangeMatchCallback bool + // IsContiguous indicates whether this range expression is a + // single contiguous set of keys on disk. Permit a sequence of + // (1) zero or more equality restrictions, (2) zero or one + // non-equality, and (3) no further restrictions. + IsContiguous bool } // RangeField bounds one dimension of a Range.