mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-25 18:49:36 -06:00
[statspro] Avoid copying histograms, perf improvement (#7666)
* [statspro] Avoid unnecessary histogram copies * bump * dropped fds and colset * fix more tests * merge main * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh * bump --------- Co-authored-by: max-hoffman <max-hoffman@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
56e261abe7
commit
d6aa1e6af0
@@ -57,7 +57,7 @@ require (
|
||||
github.com/cespare/xxhash v1.1.0
|
||||
github.com/creasty/defaults v1.6.0
|
||||
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
|
||||
github.com/dolthub/go-mysql-server v0.18.1-0.20240401223252-947e7c377fd3
|
||||
github.com/dolthub/go-mysql-server v0.18.1-0.20240402153908-f98252471387
|
||||
github.com/dolthub/swiss v0.1.0
|
||||
github.com/goccy/go-json v0.10.2
|
||||
github.com/google/go-github/v57 v57.0.0
|
||||
|
||||
@@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
|
||||
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
|
||||
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y=
|
||||
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168=
|
||||
github.com/dolthub/go-mysql-server v0.18.1-0.20240401223252-947e7c377fd3 h1:W+E0m/aPEiBFwW7teLHusek2sjTdrpyqWZyiIihH6ik=
|
||||
github.com/dolthub/go-mysql-server v0.18.1-0.20240401223252-947e7c377fd3/go.mod h1:SJleIOwC74u9tdUoGgVgM/eLlwVj3sJEFfx0sdStvW0=
|
||||
github.com/dolthub/go-mysql-server v0.18.1-0.20240402153908-f98252471387 h1:/611tSrBfDRH38MbrSgdvWZiX++d5txRThBwX0e+l2s=
|
||||
github.com/dolthub/go-mysql-server v0.18.1-0.20240402153908-f98252471387/go.mod h1:SJleIOwC74u9tdUoGgVgM/eLlwVj3sJEFfx0sdStvW0=
|
||||
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514=
|
||||
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488/go.mod h1:ehexgi1mPxRTk0Mok/pADALuHbvATulTh6gzr7NzZto=
|
||||
github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71 h1:bMGS25NWAGTEtT5tOBsCuCrlYnLRKpbJVJkDbrTRhwQ=
|
||||
|
||||
@@ -244,7 +244,7 @@ func (n *NomsStatsDatabase) DeleteBranchStats(ctx context.Context, branch string
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, _, newChunks []statspro.DoltBucket) error {
|
||||
func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error {
|
||||
var dbStat dbStats
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(b, branch) {
|
||||
@@ -261,12 +261,12 @@ func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qu
|
||||
}
|
||||
|
||||
if _, ok := dbStat[qual]; ok {
|
||||
oldChunks := dbStat[qual].Histogram
|
||||
oldChunks := dbStat[qual].Hist
|
||||
targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dbStat[qual].Histogram = targetBuckets
|
||||
dbStat[qual].Hist = targetBuckets
|
||||
} else {
|
||||
dbStat[qual] = statspro.NewDoltStats()
|
||||
}
|
||||
|
||||
@@ -109,68 +109,70 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
|
||||
}
|
||||
|
||||
qual := sql.NewStatQualifier(dbName, tableName, indexName)
|
||||
if currentStat.Qual.String() != qual.String() {
|
||||
if !currentStat.Qual.Empty() {
|
||||
currentStat.LowerBound, err = loadLowerBound(ctx, currentStat.Qual)
|
||||
if currentStat.Statistic.Qual.String() != qual.String() {
|
||||
if !currentStat.Statistic.Qual.Empty() {
|
||||
currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Qual)
|
||||
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
currentStat.Fds = fds
|
||||
currentStat.ColSet = colSet
|
||||
currentStat.Statistic.Fds = fds
|
||||
currentStat.Statistic.Colset = colSet
|
||||
currentStat.UpdateActive()
|
||||
qualToStats[currentStat.Qual] = currentStat
|
||||
qualToStats[currentStat.Statistic.Qual] = currentStat
|
||||
}
|
||||
|
||||
currentStat = statspro.NewDoltStats()
|
||||
currentStat.Qual = qual
|
||||
currentStat.Columns = columns
|
||||
currentStat.LowerBound = lowerBound
|
||||
currentStat.Statistic.Qual = qual
|
||||
currentStat.Statistic.Cols = columns
|
||||
currentStat.Statistic.LowerBnd = lowerBound
|
||||
}
|
||||
|
||||
if currentStat.Histogram == nil {
|
||||
currentStat.Types, err = stats.ParseTypeStrings(typs)
|
||||
if currentStat.Statistic.Hist == nil {
|
||||
currentStat.Statistic.Typs, err = stats.ParseTypeStrings(typs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
currentStat.Qual = qual
|
||||
currentStat.Statistic.Qual = qual
|
||||
}
|
||||
|
||||
bucket := statspro.DoltBucket{
|
||||
Chunk: commit,
|
||||
RowCount: uint64(rowCount),
|
||||
DistinctCount: uint64(distinctCount),
|
||||
NullCount: uint64(nullCount),
|
||||
CreatedAt: createdAt,
|
||||
Mcvs: mcvs,
|
||||
McvCount: mcvCnts,
|
||||
BoundCount: upperBoundCnt,
|
||||
UpperBound: boundRow,
|
||||
Chunk: commit,
|
||||
Created: createdAt,
|
||||
Bucket: &stats.Bucket{
|
||||
RowCnt: uint64(rowCount),
|
||||
DistinctCnt: uint64(distinctCount),
|
||||
NullCnt: uint64(nullCount),
|
||||
McvVals: mcvs,
|
||||
McvsCnt: mcvCnts,
|
||||
BoundCnt: upperBoundCnt,
|
||||
BoundVal: boundRow,
|
||||
},
|
||||
}
|
||||
|
||||
currentStat.Histogram = append(currentStat.Histogram, bucket)
|
||||
currentStat.RowCount += uint64(rowCount)
|
||||
currentStat.DistinctCount += uint64(distinctCount)
|
||||
currentStat.NullCount += uint64(rowCount)
|
||||
if currentStat.CreatedAt.Before(createdAt) {
|
||||
currentStat.CreatedAt = createdAt
|
||||
currentStat.Hist = append(currentStat.Hist, bucket)
|
||||
currentStat.Statistic.RowCnt += uint64(rowCount)
|
||||
currentStat.Statistic.DistinctCnt += uint64(distinctCount)
|
||||
currentStat.Statistic.NullCnt += uint64(rowCount)
|
||||
if currentStat.Statistic.Created.Before(createdAt) {
|
||||
currentStat.Statistic.Created = createdAt
|
||||
}
|
||||
}
|
||||
currentStat.LowerBound, err = loadLowerBound(ctx, currentStat.Qual)
|
||||
currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Qual)
|
||||
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
currentStat.Fds = fds
|
||||
currentStat.ColSet = colSet
|
||||
currentStat.Statistic.Fds = fds
|
||||
currentStat.Statistic.Colset = colSet
|
||||
currentStat.UpdateActive()
|
||||
qualToStats[currentStat.Qual] = currentStat
|
||||
qualToStats[currentStat.Statistic.Qual] = currentStat
|
||||
return qualToStats, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *s
|
||||
|
||||
keyBuilder := val.NewTupleBuilder(kd)
|
||||
|
||||
qual := dStats.Qual
|
||||
qual := dStats.Qualifier()
|
||||
pool := statsMap.NodeStore().Pool()
|
||||
|
||||
// delete previous entries for this index -> (db, table, index, pos)
|
||||
@@ -92,22 +92,22 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat
|
||||
keyBuilder := val.NewTupleBuilder(kd)
|
||||
valueBuilder := val.NewTupleBuilder(vd)
|
||||
|
||||
qual := dStats.Qual
|
||||
qual := dStats.Qualifier()
|
||||
pool := statsMap.NodeStore().Pool()
|
||||
|
||||
// now add new buckets
|
||||
typesB := strings.Builder{}
|
||||
sep := ""
|
||||
for _, t := range dStats.Types {
|
||||
for _, t := range dStats.Statistic.Typs {
|
||||
typesB.WriteString(sep + t.String())
|
||||
sep = ","
|
||||
}
|
||||
typesStr := typesB.String()
|
||||
|
||||
var pos int64
|
||||
for _, h := range dStats.Histogram {
|
||||
for _, h := range dStats.Hist {
|
||||
var upperBoundElems []string
|
||||
for _, v := range h.UpperBound {
|
||||
for _, v := range h.UpperBound() {
|
||||
upperBoundElems = append(upperBoundElems, fmt.Sprintf("%v", v))
|
||||
}
|
||||
|
||||
@@ -117,23 +117,23 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat
|
||||
keyBuilder.PutInt64(3, pos)
|
||||
|
||||
valueBuilder.PutInt64(0, schema.StatsVersion)
|
||||
valueBuilder.PutString(1, h.Chunk.String())
|
||||
valueBuilder.PutInt64(2, int64(h.RowCount))
|
||||
valueBuilder.PutInt64(3, int64(h.DistinctCount))
|
||||
valueBuilder.PutInt64(4, int64(h.NullCount))
|
||||
valueBuilder.PutString(5, strings.Join(dStats.Columns, ","))
|
||||
valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String())
|
||||
valueBuilder.PutInt64(2, int64(h.RowCount()))
|
||||
valueBuilder.PutInt64(3, int64(h.DistinctCount()))
|
||||
valueBuilder.PutInt64(4, int64(h.NullCount()))
|
||||
valueBuilder.PutString(5, strings.Join(dStats.Columns(), ","))
|
||||
valueBuilder.PutString(6, typesStr)
|
||||
valueBuilder.PutString(7, stats.StringifyKey(h.UpperBound, dStats.Types))
|
||||
valueBuilder.PutInt64(8, int64(h.BoundCount))
|
||||
valueBuilder.PutDatetime(9, h.CreatedAt)
|
||||
for i, r := range h.Mcvs {
|
||||
valueBuilder.PutString(10+i, stats.StringifyKey(r, dStats.Types))
|
||||
valueBuilder.PutString(7, stats.StringifyKey(h.UpperBound(), dStats.Statistic.Typs))
|
||||
valueBuilder.PutInt64(8, int64(h.BoundCount()))
|
||||
valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h))
|
||||
for i, r := range h.Mcvs() {
|
||||
valueBuilder.PutString(10+i, stats.StringifyKey(r, dStats.Statistic.Typs))
|
||||
}
|
||||
var mcvCntsRow sql.Row
|
||||
for _, v := range h.McvCount {
|
||||
for _, v := range h.McvCounts() {
|
||||
mcvCntsRow = append(mcvCntsRow, int(v))
|
||||
}
|
||||
valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, dStats.Types))
|
||||
valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, dStats.Statistic.Typs))
|
||||
|
||||
key := keyBuilder.Build(pool)
|
||||
value := valueBuilder.Build(pool)
|
||||
|
||||
@@ -94,7 +94,7 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin
|
||||
curStat, ok := statDb.GetStat(branch, qual)
|
||||
if !ok {
|
||||
curStat = NewDoltStats()
|
||||
curStat.Qual = qual
|
||||
curStat.Statistic.Qual = qual
|
||||
}
|
||||
idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
|
||||
if err != nil {
|
||||
@@ -111,7 +111,7 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin
|
||||
// merge new chunks with preexisting chunks
|
||||
for _, idxMeta := range idxMetas {
|
||||
stat := newTableStats[idxMeta.qual]
|
||||
targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Histogram)
|
||||
targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -120,7 +120,7 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin
|
||||
continue
|
||||
}
|
||||
stat.Chunks = idxMeta.allAddrs
|
||||
stat.Histogram = targetChunks
|
||||
stat.Hist = targetChunks
|
||||
stat.UpdateActive()
|
||||
if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil {
|
||||
return err
|
||||
@@ -176,7 +176,7 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table,
|
||||
return indexMeta{}, err
|
||||
} else if cnt == 0 {
|
||||
return indexMeta{
|
||||
qual: curStats.Qual,
|
||||
qual: curStats.Statistic.Qual,
|
||||
cols: cols,
|
||||
}, nil
|
||||
}
|
||||
@@ -188,7 +188,7 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table,
|
||||
}
|
||||
|
||||
var addrs []hash.Hash
|
||||
var keepChunks []DoltBucket
|
||||
var keepChunks []sql.HistogramBucket
|
||||
var missingAddrs float64
|
||||
var missingChunks []tree.Node
|
||||
var missingOffsets []updateOrdinal
|
||||
@@ -210,27 +210,27 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table,
|
||||
missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)})
|
||||
missingAddrs++
|
||||
} else {
|
||||
keepChunks = append(keepChunks, curStats.Histogram[bucketIdx])
|
||||
keepChunks = append(keepChunks, curStats.Hist[bucketIdx])
|
||||
}
|
||||
offset += uint64(treeCnt)
|
||||
}
|
||||
|
||||
var dropChunks []DoltBucket
|
||||
var dropChunks []sql.HistogramBucket
|
||||
for _, h := range curStats.Chunks {
|
||||
var match bool
|
||||
for _, b := range keepChunks {
|
||||
if b.Chunk == h {
|
||||
if DoltBucketChunk(b) == h {
|
||||
match = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !match {
|
||||
dropChunks = append(dropChunks, curStats.Histogram[curStats.Active[h]])
|
||||
dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]])
|
||||
}
|
||||
}
|
||||
|
||||
return indexMeta{
|
||||
qual: curStats.Qual,
|
||||
qual: curStats.Statistic.Qual,
|
||||
cols: cols,
|
||||
newNodes: missingChunks,
|
||||
updateOrdinals: missingOffsets,
|
||||
|
||||
@@ -168,18 +168,18 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br
|
||||
curStat, ok := statDb.GetStat(branch, qual)
|
||||
if !ok {
|
||||
curStat = NewDoltStats()
|
||||
curStat.Qual = qual
|
||||
curStat.Statistic.Qual = qual
|
||||
|
||||
cols := make([]string, len(index.Expressions()))
|
||||
tablePrefix := fmt.Sprintf("%s.", table)
|
||||
for i, c := range index.Expressions() {
|
||||
cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
|
||||
}
|
||||
curStat.Columns = cols
|
||||
curStat.Statistic.Cols = cols
|
||||
}
|
||||
ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String())
|
||||
|
||||
updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns)
|
||||
updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
|
||||
if err != nil {
|
||||
ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
|
||||
continue
|
||||
@@ -215,7 +215,7 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br
|
||||
if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok {
|
||||
err = statDb.SetStat(ctx, branch, updateMeta.qual, stat)
|
||||
} else {
|
||||
err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Histogram)
|
||||
err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -26,31 +26,142 @@ import (
|
||||
)
|
||||
|
||||
type DoltStats struct {
|
||||
mu *sync.Mutex
|
||||
Statistic *stats.Statistic
|
||||
mu *sync.Mutex
|
||||
// Chunks is a list of addresses for the histogram fanout level
|
||||
Chunks []hash.Hash
|
||||
// Active maps a chunk/bucket address to its position in
|
||||
// the histogram. 1-indexed to differentiate from an empty
|
||||
// field on disk
|
||||
Active map[hash.Hash]int
|
||||
Hist sql.Histogram
|
||||
}
|
||||
|
||||
RowCount uint64
|
||||
DistinctCount uint64
|
||||
NullCount uint64
|
||||
AvgSize uint64
|
||||
Qual sql.StatQualifier
|
||||
CreatedAt time.Time
|
||||
Histogram DoltHistogram
|
||||
Columns []string
|
||||
Types []sql.Type
|
||||
IdxClass uint8
|
||||
LowerBound sql.Row
|
||||
Fds *sql.FuncDepSet
|
||||
ColSet sql.ColSet
|
||||
var _ sql.Statistic = (*DoltStats)(nil)
|
||||
|
||||
func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithRowCount(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithNullCount(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) RowCount() uint64 {
|
||||
return s.Statistic.RowCount()
|
||||
}
|
||||
|
||||
func (s *DoltStats) DistinctCount() uint64 {
|
||||
return s.Statistic.DistinctCount()
|
||||
}
|
||||
|
||||
func (s *DoltStats) NullCount() uint64 {
|
||||
return s.Statistic.NullCount()
|
||||
|
||||
}
|
||||
|
||||
func (s *DoltStats) AvgSize() uint64 {
|
||||
return s.Statistic.AvgSize()
|
||||
|
||||
}
|
||||
|
||||
func (s *DoltStats) CreatedAt() time.Time {
|
||||
return s.Statistic.CreatedAt()
|
||||
|
||||
}
|
||||
|
||||
func (s *DoltStats) Columns() []string {
|
||||
return s.Statistic.Columns()
|
||||
}
|
||||
|
||||
func (s *DoltStats) Types() []sql.Type {
|
||||
return s.Statistic.Types()
|
||||
}
|
||||
|
||||
func (s *DoltStats) Qualifier() sql.StatQualifier {
|
||||
return s.Statistic.Qualifier()
|
||||
}
|
||||
|
||||
func (s *DoltStats) IndexClass() sql.IndexClass {
|
||||
return s.Statistic.IndexClass()
|
||||
}
|
||||
|
||||
func (s *DoltStats) FuncDeps() *sql.FuncDepSet {
|
||||
return s.Statistic.FuncDeps()
|
||||
}
|
||||
|
||||
func (s *DoltStats) ColSet() sql.ColSet {
|
||||
return s.Statistic.ColSet()
|
||||
}
|
||||
|
||||
func (s *DoltStats) LowerBound() sql.Row {
|
||||
return s.Statistic.LowerBound()
|
||||
}
|
||||
|
||||
func NewDoltStats() *DoltStats {
|
||||
return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int)}
|
||||
return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}}
|
||||
}
|
||||
|
||||
func (s *DoltStats) ToInterface() interface{} {
|
||||
ret := s.Statistic.ToInterface().(map[string]interface{})
|
||||
|
||||
var hist sql.Histogram
|
||||
for _, b := range s.Hist {
|
||||
hist = append(hist, b)
|
||||
}
|
||||
ret["statistic"].(map[string]interface{})["buckets"] = hist.ToInterface()
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
|
||||
ret := *s
|
||||
ret.Hist = nil
|
||||
for _, b := range h {
|
||||
doltB, ok := b.(DoltBucket)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid bucket type: %T", b)
|
||||
}
|
||||
ret.Hist = append(ret.Hist, doltB)
|
||||
}
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s *DoltStats) Histogram() sql.Histogram {
|
||||
return s.Hist
|
||||
}
|
||||
|
||||
func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) {
|
||||
@@ -58,22 +169,15 @@ func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &DoltStats{
|
||||
mu: &sync.Mutex{},
|
||||
Qual: stat.Qualifier(),
|
||||
RowCount: stat.RowCount(),
|
||||
DistinctCount: stat.DistinctCount(),
|
||||
NullCount: stat.NullCount(),
|
||||
AvgSize: stat.AvgSize(),
|
||||
CreatedAt: stat.CreatedAt(),
|
||||
Histogram: hist,
|
||||
Columns: stat.Columns(),
|
||||
Types: stat.Types(),
|
||||
IdxClass: uint8(stat.IndexClass()),
|
||||
LowerBound: stat.LowerBound(),
|
||||
Fds: stat.FuncDeps(),
|
||||
ColSet: stat.ColSet(),
|
||||
}, nil
|
||||
ret := &DoltStats{
|
||||
mu: &sync.Mutex{},
|
||||
Hist: hist,
|
||||
Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()),
|
||||
Active: make(map[hash.Hash]int),
|
||||
}
|
||||
ret.Statistic.Fds = stat.FuncDeps()
|
||||
ret.Statistic.Colset = stat.ColSet()
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s *DoltStats) UpdateActive() {
|
||||
@@ -86,49 +190,26 @@ func (s *DoltStats) UpdateActive() {
|
||||
s.Active = newActive
|
||||
}
|
||||
|
||||
func (s *DoltStats) updateCounts() {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
var newDistinct uint64
|
||||
var newRows uint64
|
||||
var newNulls uint64
|
||||
for _, b := range s.Histogram {
|
||||
newDistinct += b.DistinctCount
|
||||
newRows += b.RowCount
|
||||
newNulls += b.NullCount
|
||||
}
|
||||
s.RowCount = newRows
|
||||
s.DistinctCount = newDistinct
|
||||
s.NullCount = newNulls
|
||||
}
|
||||
|
||||
func (s *DoltStats) toSql() sql.Statistic {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
typStrs := make([]string, len(s.Types))
|
||||
for i, typ := range s.Types {
|
||||
typStrs[i] = typ.String()
|
||||
}
|
||||
stat := stats.NewStatistic(s.RowCount, s.DistinctCount, s.NullCount, s.AvgSize, s.CreatedAt, s.Qual, s.Columns, s.Types, s.Histogram.toSql(), sql.IndexClass(s.IdxClass), s.LowerBound)
|
||||
return stat.WithColSet(s.ColSet).WithFuncDeps(s.Fds)
|
||||
}
|
||||
|
||||
type DoltHistogram []DoltBucket
|
||||
|
||||
type DoltBucket struct {
|
||||
Chunk hash.Hash
|
||||
RowCount uint64
|
||||
DistinctCount uint64
|
||||
NullCount uint64
|
||||
CreatedAt time.Time
|
||||
Mcvs []sql.Row
|
||||
McvCount []uint64
|
||||
BoundCount uint64
|
||||
UpperBound sql.Row
|
||||
*stats.Bucket
|
||||
Chunk hash.Hash
|
||||
Created time.Time
|
||||
}
|
||||
|
||||
func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (DoltHistogram, error) {
|
||||
ret := make([]DoltBucket, len(hist))
|
||||
func DoltBucketChunk(b sql.HistogramBucket) hash.Hash {
|
||||
return b.(DoltBucket).Chunk
|
||||
}
|
||||
|
||||
func DoltBucketCreated(b sql.HistogramBucket) time.Time {
|
||||
return b.(DoltBucket).Created
|
||||
}
|
||||
|
||||
var _ sql.HistogramBucket = (*DoltBucket)(nil)
|
||||
|
||||
func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) {
|
||||
ret := make(sql.Histogram, len(hist))
|
||||
var err error
|
||||
for i, b := range hist {
|
||||
upperBound := make(sql.Row, len(b.UpperBound()))
|
||||
@@ -149,24 +230,8 @@ func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (DoltHistogram, error
|
||||
}
|
||||
}
|
||||
ret[i] = DoltBucket{
|
||||
RowCount: b.RowCount(),
|
||||
DistinctCount: b.DistinctCount(),
|
||||
NullCount: b.NullCount(),
|
||||
Mcvs: mcvs,
|
||||
McvCount: b.McvCounts(),
|
||||
BoundCount: b.BoundCount(),
|
||||
UpperBound: upperBound,
|
||||
Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs),
|
||||
}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s DoltHistogram) toSql() []*stats.Bucket {
|
||||
ret := make([]*stats.Bucket, len(s))
|
||||
for i, b := range s {
|
||||
upperBound := make([]interface{}, len(b.UpperBound))
|
||||
copy(upperBound, b.UpperBound)
|
||||
ret[i] = stats.NewHistogramBucket(b.RowCount, b.DistinctCount, b.NullCount, b.BoundCount, upperBound, b.McvCount, b.Mcvs)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ type Database interface {
|
||||
DeleteStats(branch string, quals ...sql.StatQualifier)
|
||||
// ReplaceChunks is an update interface that lets a stats implementation
|
||||
// decide how to edit stats for a stats refresh.
|
||||
ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []DoltBucket) error
|
||||
ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error
|
||||
// Flush instructs the database to sync any partial state to disk
|
||||
Flush(ctx context.Context, branch string) error
|
||||
// Close finalizes any file references.
|
||||
|
||||
@@ -38,8 +38,8 @@ type indexMeta struct {
|
||||
newNodes []tree.Node
|
||||
// updateOrdinals are [start, stop] tuples for each update chunk
|
||||
updateOrdinals []updateOrdinal
|
||||
keepChunks []DoltBucket
|
||||
dropChunks []DoltBucket
|
||||
keepChunks []sql.HistogramBucket
|
||||
dropChunks []sql.HistogramBucket
|
||||
allAddrs []hash.Hash
|
||||
}
|
||||
|
||||
@@ -160,7 +160,7 @@ func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, table string)
|
||||
for _, qual := range statDb.ListStatQuals(branch) {
|
||||
if strings.EqualFold(db, qual.Database) && strings.EqualFold(table, qual.Tab) {
|
||||
stat, _ := statDb.GetStat(branch, qual)
|
||||
ret = append(ret, stat.toSql())
|
||||
ret = append(ret, stat)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -224,7 +224,7 @@ func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
return stat.toSql(), true
|
||||
return stat, true
|
||||
}
|
||||
|
||||
func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error {
|
||||
@@ -299,7 +299,7 @@ func (p *Provider) RowCount(ctx *sql.Context, db, table string) (uint64, error)
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return priStats.RowCount, nil
|
||||
return priStats.RowCount(), nil
|
||||
}
|
||||
|
||||
func (p *Provider) DataLength(ctx *sql.Context, db, table string) (uint64, error) {
|
||||
@@ -322,5 +322,5 @@ func (p *Provider) DataLength(ctx *sql.Context, db, table string) (uint64, error
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return priStats.AvgSize, nil
|
||||
return priStats.AvgSize(), nil
|
||||
}
|
||||
|
||||
@@ -82,13 +82,13 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta
|
||||
} else if cnt == 0 {
|
||||
// table is empty
|
||||
ret[meta.qual] = NewDoltStats()
|
||||
ret[meta.qual].CreatedAt = time.Now()
|
||||
ret[meta.qual].Columns = meta.cols
|
||||
ret[meta.qual].Types = types
|
||||
ret[meta.qual].Qual = meta.qual
|
||||
ret[meta.qual].Statistic.Created = time.Now()
|
||||
ret[meta.qual].Statistic.Cols = meta.cols
|
||||
ret[meta.qual].Statistic.Typs = types
|
||||
ret[meta.qual].Statistic.Qual = meta.qual
|
||||
|
||||
ret[meta.qual].Fds = fds
|
||||
ret[meta.qual].ColSet = colSet
|
||||
ret[meta.qual].Statistic.Fds = fds
|
||||
ret[meta.qual].Statistic.Colset = colSet
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -100,10 +100,10 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta
|
||||
updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc())
|
||||
ret[meta.qual] = NewDoltStats()
|
||||
ret[meta.qual].Chunks = meta.allAddrs
|
||||
ret[meta.qual].CreatedAt = time.Now()
|
||||
ret[meta.qual].Columns = meta.cols
|
||||
ret[meta.qual].Types = types
|
||||
ret[meta.qual].Qual = meta.qual
|
||||
ret[meta.qual].Statistic.Created = time.Now()
|
||||
ret[meta.qual].Statistic.Cols = meta.cols
|
||||
ret[meta.qual].Statistic.Typs = types
|
||||
ret[meta.qual].Statistic.Qual = meta.qual
|
||||
|
||||
var start, stop uint64
|
||||
// read leaf rows for each bucket
|
||||
@@ -140,14 +140,14 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta
|
||||
return nil, err
|
||||
}
|
||||
bucket.Chunk = chunk.HashOf()
|
||||
ret[updater.qual].Histogram = append(ret[updater.qual].Histogram, bucket)
|
||||
ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket)
|
||||
}
|
||||
|
||||
ret[updater.qual].DistinctCount = uint64(updater.globalDistinct)
|
||||
ret[updater.qual].RowCount = uint64(updater.globalCount)
|
||||
ret[updater.qual].LowerBound = firstRow
|
||||
ret[updater.qual].Fds = fds
|
||||
ret[updater.qual].ColSet = colSet
|
||||
ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct)
|
||||
ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount)
|
||||
ret[updater.qual].Statistic.LowerBnd = firstRow
|
||||
ret[updater.qual].Statistic.Fds = fds
|
||||
ret[updater.qual].Statistic.Colset = colSet
|
||||
ret[updater.qual].UpdateActive()
|
||||
}
|
||||
return ret, nil
|
||||
@@ -156,22 +156,22 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta
|
||||
// MergeNewChunks combines a set of old and new chunks to create
|
||||
// the desired target histogram. Undefined behavior if a |targetHash|
|
||||
// does not exist in either |oldChunks| or |newChunks|.
|
||||
func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []DoltBucket) ([]DoltBucket, error) {
|
||||
func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) {
|
||||
hashToPos := make(map[hash.Hash]int, len(inputHashes))
|
||||
for i, h := range inputHashes {
|
||||
hashToPos[h] = i
|
||||
}
|
||||
|
||||
var cnt int
|
||||
targetBuckets := make([]DoltBucket, len(inputHashes))
|
||||
targetBuckets := make([]sql.HistogramBucket, len(inputHashes))
|
||||
for _, c := range oldChunks {
|
||||
if idx, ok := hashToPos[c.Chunk]; ok {
|
||||
if idx, ok := hashToPos[DoltBucketChunk(c)]; ok {
|
||||
cnt++
|
||||
targetBuckets[idx] = c
|
||||
}
|
||||
}
|
||||
for _, c := range newChunks {
|
||||
if idx, ok := hashToPos[c.Chunk]; ok && targetBuckets[idx].Chunk.IsEmpty() {
|
||||
if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil {
|
||||
cnt++
|
||||
targetBuckets[idx] = c
|
||||
}
|
||||
@@ -280,13 +280,15 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu
|
||||
}
|
||||
}
|
||||
return DoltBucket{
|
||||
RowCount: uint64(u.count),
|
||||
DistinctCount: uint64(u.distinct),
|
||||
BoundCount: uint64(u.currentCnt),
|
||||
Mcvs: mcvRows,
|
||||
McvCount: u.mcvs.Counts(),
|
||||
UpperBound: upperBound,
|
||||
NullCount: uint64(u.nulls),
|
||||
Bucket: &stats.Bucket{
|
||||
RowCnt: uint64(u.count),
|
||||
DistinctCnt: uint64(u.distinct),
|
||||
BoundCnt: uint64(u.currentCnt),
|
||||
McvVals: mcvRows,
|
||||
McvsCnt: u.mcvs.Counts(),
|
||||
BoundVal: upperBound,
|
||||
NullCnt: uint64(u.nulls),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
@@ -66,109 +67,109 @@ func TestBucketBuilder(t *testing.T) {
|
||||
name: "ints",
|
||||
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 15,
|
||||
DistinctCount: 5,
|
||||
Mcvs: []sql.Row{{int64(4)}, {int64(2)}, {int64(3)}},
|
||||
McvCount: []uint64{3, 4, 3},
|
||||
UpperBound: sql.Row{int64(5)},
|
||||
BoundCount: 2,
|
||||
},
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 5,
|
||||
McvVals: []sql.Row{{int64(4)}, {int64(2)}, {int64(3)}},
|
||||
McvsCnt: []uint64{3, 4, 3},
|
||||
BoundVal: sql.Row{int64(5)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
{
|
||||
// technically nulls should be at beginning
|
||||
name: "ints with middle nulls",
|
||||
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 16,
|
||||
DistinctCount: 6,
|
||||
NullCount: 3,
|
||||
Mcvs: []sql.Row{{int64(4)}, {int64(2)}, {nil}},
|
||||
McvCount: []uint64{3, 4, 3},
|
||||
UpperBound: sql.Row{int64(5)},
|
||||
BoundCount: 2,
|
||||
},
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 16,
|
||||
DistinctCnt: 6,
|
||||
NullCnt: 3,
|
||||
McvVals: []sql.Row{{int64(4)}, {int64(2)}, {nil}},
|
||||
McvsCnt: []uint64{3, 4, 3},
|
||||
BoundVal: sql.Row{int64(5)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "ints with beginning nulls",
|
||||
keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 15,
|
||||
DistinctCount: 6,
|
||||
NullCount: 2,
|
||||
Mcvs: []sql.Row{{int64(3)}, {int64(4)}, {int64(2)}},
|
||||
McvCount: []uint64{3, 3, 4},
|
||||
UpperBound: sql.Row{int64(5)},
|
||||
BoundCount: 2,
|
||||
},
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 6,
|
||||
NullCnt: 2,
|
||||
McvVals: []sql.Row{{int64(3)}, {int64(4)}, {int64(2)}},
|
||||
McvsCnt: []uint64{3, 3, 4},
|
||||
BoundVal: sql.Row{int64(5)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "more ints",
|
||||
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 22,
|
||||
DistinctCount: 7,
|
||||
BoundCount: 1,
|
||||
Mcvs: []sql.Row{{int64(2)}, {int64(6)}, {int64(5)}},
|
||||
McvCount: []uint64{4, 4, 4},
|
||||
UpperBound: sql.Row{int64(7)},
|
||||
},
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 22,
|
||||
DistinctCnt: 7,
|
||||
BoundCnt: 1,
|
||||
McvVals: []sql.Row{{int64(2)}, {int64(6)}, {int64(5)}},
|
||||
McvsCnt: []uint64{4, 4, 4},
|
||||
BoundVal: sql.Row{int64(7)},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "2-ints",
|
||||
keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 15,
|
||||
DistinctCount: 11,
|
||||
Mcvs: []sql.Row{{int64(1), int64(1)}, {int64(4), int64(1)}, {int64(2), int64(3)}},
|
||||
McvCount: []uint64{2, 3, 2},
|
||||
UpperBound: sql.Row{int64(5), int64(2)},
|
||||
BoundCount: 1,
|
||||
},
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 11,
|
||||
McvVals: []sql.Row{{int64(1), int64(1)}, {int64(4), int64(1)}, {int64(2), int64(3)}},
|
||||
McvsCnt: []uint64{2, 3, 2},
|
||||
BoundVal: sql.Row{int64(5), int64(2)},
|
||||
BoundCnt: 1,
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "2-ints with nulls",
|
||||
keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 5,
|
||||
DistinctCount: 5,
|
||||
NullCount: 3,
|
||||
Mcvs: []sql.Row{{int64(2), int64(2)}, {int64(1), nil}, {int64(1), int64(2)}},
|
||||
McvCount: []uint64{1, 1, 1},
|
||||
UpperBound: sql.Row{int64(2), int64(2)},
|
||||
BoundCount: 1,
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 5,
|
||||
DistinctCnt: 5,
|
||||
NullCnt: 3,
|
||||
McvVals: []sql.Row{{int64(2), int64(2)}, {int64(1), nil}, {int64(1), int64(2)}},
|
||||
McvsCnt: []uint64{1, 1, 1},
|
||||
BoundVal: sql.Row{int64(2), int64(2)},
|
||||
BoundCnt: 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "varchars",
|
||||
keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 15,
|
||||
DistinctCount: 9,
|
||||
Mcvs: []sql.Row{{"i"}, {"h"}, {"g"}},
|
||||
McvCount: []uint64{2, 3, 3},
|
||||
UpperBound: sql.Row{"i"},
|
||||
BoundCount: 2,
|
||||
},
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 9,
|
||||
McvVals: []sql.Row{{"i"}, {"h"}, {"g"}},
|
||||
McvsCnt: []uint64{2, 3, 3},
|
||||
BoundVal: sql.Row{"i"},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "varchar-ints",
|
||||
keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{
|
||||
RowCount: 15,
|
||||
DistinctCount: 12,
|
||||
Mcvs: []sql.Row{{"i", int64(1)}, {"g", int64(2)}, {"h", int64(1)}},
|
||||
McvCount: []uint64{2, 2, 2},
|
||||
UpperBound: sql.Row{"i", int64(1)},
|
||||
BoundCount: 2,
|
||||
},
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 12,
|
||||
McvVals: []sql.Row{{"i", int64(1)}, {"g", int64(2)}, {"h", int64(1)}},
|
||||
McvsCnt: []uint64{2, 2, 2},
|
||||
BoundVal: sql.Row{"i", int64(1)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -190,13 +191,13 @@ func TestBucketBuilder(t *testing.T) {
|
||||
bucket, err := b.finalize(ctx, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, int(tt.bucket.RowCount), int(bucket.RowCount))
|
||||
require.Equal(t, int(tt.bucket.NullCount), int(bucket.NullCount))
|
||||
require.Equal(t, int(tt.bucket.DistinctCount), int(bucket.DistinctCount))
|
||||
require.Equal(t, int(tt.bucket.BoundCount), int(bucket.BoundCount))
|
||||
require.Equal(t, tt.bucket.UpperBound, bucket.UpperBound)
|
||||
require.Equal(t, tt.bucket.McvCount, bucket.McvCount)
|
||||
require.Equal(t, tt.bucket.Mcvs, bucket.Mcvs)
|
||||
require.Equal(t, int(tt.bucket.RowCount()), int(bucket.RowCount()))
|
||||
require.Equal(t, int(tt.bucket.NullCount()), int(bucket.NullCount()))
|
||||
require.Equal(t, int(tt.bucket.DistinctCount()), int(bucket.DistinctCount()))
|
||||
require.Equal(t, int(tt.bucket.BoundCount()), int(bucket.BoundCount()))
|
||||
require.Equal(t, tt.bucket.UpperBound(), bucket.UpperBound())
|
||||
require.Equal(t, tt.bucket.McvsCnt, bucket.McvsCnt)
|
||||
require.Equal(t, tt.bucket.Mcvs(), bucket.Mcvs())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ type ChunkStoreGarbageCollector interface {
|
||||
BeginGC(addChunk func(hash.Hash) bool) error
|
||||
|
||||
// EndGC indicates that the GC is over. The previously provided
|
||||
// addChunk function must not be called after this function function.
|
||||
// addChunk function must not be called after this function.
|
||||
EndGC()
|
||||
|
||||
// MarkAndSweepChunks is expected to read chunk addresses off of
|
||||
|
||||
@@ -100,7 +100,9 @@ type NomsBlockStore struct {
|
||||
|
||||
cond *sync.Cond
|
||||
gcInProgress bool
|
||||
keeperFunc func(hash.Hash) bool
|
||||
// keeperFunc is set when |gcInProgress| and appends to the GC sweep queue
|
||||
// or blocks on GC finalize
|
||||
keeperFunc func(hash.Hash) bool
|
||||
|
||||
mtSize uint64
|
||||
putCount uint64
|
||||
|
||||
Reference in New Issue
Block a user