diff --git a/go/go.mod b/go/go.mod index 78c68929c9..9422976225 100644 --- a/go/go.mod +++ b/go/go.mod @@ -57,7 +57,7 @@ require ( github.com/cespare/xxhash v1.1.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.18.1-0.20240401223252-947e7c377fd3 + github.com/dolthub/go-mysql-server v0.18.1-0.20240402153908-f98252471387 github.com/dolthub/swiss v0.1.0 github.com/goccy/go-json v0.10.2 github.com/google/go-github/v57 v57.0.0 diff --git a/go/go.sum b/go/go.sum index d2ad2198e1..e283bd11d3 100644 --- a/go/go.sum +++ b/go/go.sum @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168= -github.com/dolthub/go-mysql-server v0.18.1-0.20240401223252-947e7c377fd3 h1:W+E0m/aPEiBFwW7teLHusek2sjTdrpyqWZyiIihH6ik= -github.com/dolthub/go-mysql-server v0.18.1-0.20240401223252-947e7c377fd3/go.mod h1:SJleIOwC74u9tdUoGgVgM/eLlwVj3sJEFfx0sdStvW0= +github.com/dolthub/go-mysql-server v0.18.1-0.20240402153908-f98252471387 h1:/611tSrBfDRH38MbrSgdvWZiX++d5txRThBwX0e+l2s= +github.com/dolthub/go-mysql-server v0.18.1-0.20240402153908-f98252471387/go.mod h1:SJleIOwC74u9tdUoGgVgM/eLlwVj3sJEFfx0sdStvW0= github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514= github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488/go.mod h1:ehexgi1mPxRTk0Mok/pADALuHbvATulTh6gzr7NzZto= github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71 h1:bMGS25NWAGTEtT5tOBsCuCrlYnLRKpbJVJkDbrTRhwQ= diff --git a/go/libraries/doltcore/sqle/statsnoms/database.go b/go/libraries/doltcore/sqle/statsnoms/database.go index bb34e703a5..4565062f70 100644 --- a/go/libraries/doltcore/sqle/statsnoms/database.go +++ b/go/libraries/doltcore/sqle/statsnoms/database.go @@ -244,7 +244,7 @@ func (n *NomsStatsDatabase) DeleteBranchStats(ctx context.Context, branch string return nil } -func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, _, newChunks []statspro.DoltBucket) error { +func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error { var dbStat dbStats for i, b := range n.branches { if strings.EqualFold(b, branch) { @@ -261,12 +261,12 @@ func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qu } if _, ok := dbStat[qual]; ok { - oldChunks := dbStat[qual].Histogram + oldChunks := dbStat[qual].Hist targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks) if err != nil { return err } - dbStat[qual].Histogram = targetBuckets + dbStat[qual].Hist = targetBuckets } else { dbStat[qual] = statspro.NewDoltStats() } diff --git a/go/libraries/doltcore/sqle/statsnoms/load.go b/go/libraries/doltcore/sqle/statsnoms/load.go index d73b23e3c8..6188ed4399 100644 --- a/go/libraries/doltcore/sqle/statsnoms/load.go +++ b/go/libraries/doltcore/sqle/statsnoms/load.go @@ -109,68 +109,70 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St } qual := sql.NewStatQualifier(dbName, tableName, indexName) - if currentStat.Qual.String() != qual.String() { - if !currentStat.Qual.Empty() { - currentStat.LowerBound, err = loadLowerBound(ctx, currentStat.Qual) + if currentStat.Statistic.Qual.String() != qual.String() { + if !currentStat.Statistic.Qual.Empty() { + currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual) if err != nil { return nil, err } - fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Qual) + fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual) if err != nil { return nil, err } - currentStat.Fds = fds - currentStat.ColSet = colSet + currentStat.Statistic.Fds = fds + currentStat.Statistic.Colset = colSet currentStat.UpdateActive() - qualToStats[currentStat.Qual] = currentStat + qualToStats[currentStat.Statistic.Qual] = currentStat } currentStat = statspro.NewDoltStats() - currentStat.Qual = qual - currentStat.Columns = columns - currentStat.LowerBound = lowerBound + currentStat.Statistic.Qual = qual + currentStat.Statistic.Cols = columns + currentStat.Statistic.LowerBnd = lowerBound } - if currentStat.Histogram == nil { - currentStat.Types, err = stats.ParseTypeStrings(typs) + if currentStat.Statistic.Hist == nil { + currentStat.Statistic.Typs, err = stats.ParseTypeStrings(typs) if err != nil { return nil, err } - currentStat.Qual = qual + currentStat.Statistic.Qual = qual } bucket := statspro.DoltBucket{ - Chunk: commit, - RowCount: uint64(rowCount), - DistinctCount: uint64(distinctCount), - NullCount: uint64(nullCount), - CreatedAt: createdAt, - Mcvs: mcvs, - McvCount: mcvCnts, - BoundCount: upperBoundCnt, - UpperBound: boundRow, + Chunk: commit, + Created: createdAt, + Bucket: &stats.Bucket{ + RowCnt: uint64(rowCount), + DistinctCnt: uint64(distinctCount), + NullCnt: uint64(nullCount), + McvVals: mcvs, + McvsCnt: mcvCnts, + BoundCnt: upperBoundCnt, + BoundVal: boundRow, + }, } - currentStat.Histogram = append(currentStat.Histogram, bucket) - currentStat.RowCount += uint64(rowCount) - currentStat.DistinctCount += uint64(distinctCount) - currentStat.NullCount += uint64(rowCount) - if currentStat.CreatedAt.Before(createdAt) { - currentStat.CreatedAt = createdAt + currentStat.Hist = append(currentStat.Hist, bucket) + currentStat.Statistic.RowCnt += uint64(rowCount) + currentStat.Statistic.DistinctCnt += uint64(distinctCount) + currentStat.Statistic.NullCnt += uint64(rowCount) + if currentStat.Statistic.Created.Before(createdAt) { + currentStat.Statistic.Created = createdAt } } - currentStat.LowerBound, err = loadLowerBound(ctx, currentStat.Qual) + currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual) if err != nil { return nil, err } - fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Qual) + fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual) if err != nil { return nil, err } - currentStat.Fds = fds - currentStat.ColSet = colSet + currentStat.Statistic.Fds = fds + currentStat.Statistic.Colset = colSet currentStat.UpdateActive() - qualToStats[currentStat.Qual] = currentStat + qualToStats[currentStat.Statistic.Qual] = currentStat return qualToStats, nil } diff --git a/go/libraries/doltcore/sqle/statsnoms/write.go b/go/libraries/doltcore/sqle/statsnoms/write.go index aa8fdc9f31..3e87ed0104 100644 --- a/go/libraries/doltcore/sqle/statsnoms/write.go +++ b/go/libraries/doltcore/sqle/statsnoms/write.go @@ -48,7 +48,7 @@ func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *s keyBuilder := val.NewTupleBuilder(kd) - qual := dStats.Qual + qual := dStats.Qualifier() pool := statsMap.NodeStore().Pool() // delete previous entries for this index -> (db, table, index, pos) @@ -92,22 +92,22 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat keyBuilder := val.NewTupleBuilder(kd) valueBuilder := val.NewTupleBuilder(vd) - qual := dStats.Qual + qual := dStats.Qualifier() pool := statsMap.NodeStore().Pool() // now add new buckets typesB := strings.Builder{} sep := "" - for _, t := range dStats.Types { + for _, t := range dStats.Statistic.Typs { typesB.WriteString(sep + t.String()) sep = "," } typesStr := typesB.String() var pos int64 - for _, h := range dStats.Histogram { + for _, h := range dStats.Hist { var upperBoundElems []string - for _, v := range h.UpperBound { + for _, v := range h.UpperBound() { upperBoundElems = append(upperBoundElems, fmt.Sprintf("%v", v)) } @@ -117,23 +117,23 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat keyBuilder.PutInt64(3, pos) valueBuilder.PutInt64(0, schema.StatsVersion) - valueBuilder.PutString(1, h.Chunk.String()) - valueBuilder.PutInt64(2, int64(h.RowCount)) - valueBuilder.PutInt64(3, int64(h.DistinctCount)) - valueBuilder.PutInt64(4, int64(h.NullCount)) - valueBuilder.PutString(5, strings.Join(dStats.Columns, ",")) + valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String()) + valueBuilder.PutInt64(2, int64(h.RowCount())) + valueBuilder.PutInt64(3, int64(h.DistinctCount())) + valueBuilder.PutInt64(4, int64(h.NullCount())) + valueBuilder.PutString(5, strings.Join(dStats.Columns(), ",")) valueBuilder.PutString(6, typesStr) - valueBuilder.PutString(7, stats.StringifyKey(h.UpperBound, dStats.Types)) - valueBuilder.PutInt64(8, int64(h.BoundCount)) - valueBuilder.PutDatetime(9, h.CreatedAt) - for i, r := range h.Mcvs { - valueBuilder.PutString(10+i, stats.StringifyKey(r, dStats.Types)) + valueBuilder.PutString(7, stats.StringifyKey(h.UpperBound(), dStats.Statistic.Typs)) + valueBuilder.PutInt64(8, int64(h.BoundCount())) + valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h)) + for i, r := range h.Mcvs() { + valueBuilder.PutString(10+i, stats.StringifyKey(r, dStats.Statistic.Typs)) } var mcvCntsRow sql.Row - for _, v := range h.McvCount { + for _, v := range h.McvCounts() { mcvCntsRow = append(mcvCntsRow, int(v)) } - valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, dStats.Types)) + valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, dStats.Statistic.Typs)) key := keyBuilder.Build(pool) value := valueBuilder.Build(pool) diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go index 0b436ea2b5..35b69ded71 100644 --- a/go/libraries/doltcore/sqle/statspro/analyze.go +++ b/go/libraries/doltcore/sqle/statspro/analyze.go @@ -94,7 +94,7 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin curStat, ok := statDb.GetStat(branch, qual) if !ok { curStat = NewDoltStats() - curStat.Qual = qual + curStat.Statistic.Qual = qual } idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols) if err != nil { @@ -111,7 +111,7 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin // merge new chunks with preexisting chunks for _, idxMeta := range idxMetas { stat := newTableStats[idxMeta.qual] - targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Histogram) + targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist) if err != nil { return err } @@ -120,7 +120,7 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin continue } stat.Chunks = idxMeta.allAddrs - stat.Histogram = targetChunks + stat.Hist = targetChunks stat.UpdateActive() if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil { return err @@ -176,7 +176,7 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, return indexMeta{}, err } else if cnt == 0 { return indexMeta{ - qual: curStats.Qual, + qual: curStats.Statistic.Qual, cols: cols, }, nil } @@ -188,7 +188,7 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, } var addrs []hash.Hash - var keepChunks []DoltBucket + var keepChunks []sql.HistogramBucket var missingAddrs float64 var missingChunks []tree.Node var missingOffsets []updateOrdinal @@ -210,27 +210,27 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)}) missingAddrs++ } else { - keepChunks = append(keepChunks, curStats.Histogram[bucketIdx]) + keepChunks = append(keepChunks, curStats.Hist[bucketIdx]) } offset += uint64(treeCnt) } - var dropChunks []DoltBucket + var dropChunks []sql.HistogramBucket for _, h := range curStats.Chunks { var match bool for _, b := range keepChunks { - if b.Chunk == h { + if DoltBucketChunk(b) == h { match = true break } } if !match { - dropChunks = append(dropChunks, curStats.Histogram[curStats.Active[h]]) + dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]]) } } return indexMeta{ - qual: curStats.Qual, + qual: curStats.Statistic.Qual, cols: cols, newNodes: missingChunks, updateOrdinals: missingOffsets, diff --git a/go/libraries/doltcore/sqle/statspro/auto_refresh.go b/go/libraries/doltcore/sqle/statspro/auto_refresh.go index 89f7b6ad6c..6443afe928 100644 --- a/go/libraries/doltcore/sqle/statspro/auto_refresh.go +++ b/go/libraries/doltcore/sqle/statspro/auto_refresh.go @@ -168,18 +168,18 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br curStat, ok := statDb.GetStat(branch, qual) if !ok { curStat = NewDoltStats() - curStat.Qual = qual + curStat.Statistic.Qual = qual cols := make([]string, len(index.Expressions())) tablePrefix := fmt.Sprintf("%s.", table) for i, c := range index.Expressions() { cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) } - curStat.Columns = cols + curStat.Statistic.Cols = cols } ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String()) - updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns) + updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns()) if err != nil { ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) continue @@ -215,7 +215,7 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok { err = statDb.SetStat(ctx, branch, updateMeta.qual, stat) } else { - err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Histogram) + err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist) } if err != nil { return err diff --git a/go/libraries/doltcore/sqle/statspro/dolt_stats.go b/go/libraries/doltcore/sqle/statspro/dolt_stats.go index c20689cc33..8f75b9229e 100644 --- a/go/libraries/doltcore/sqle/statspro/dolt_stats.go +++ b/go/libraries/doltcore/sqle/statspro/dolt_stats.go @@ -26,31 +26,142 @@ import ( ) type DoltStats struct { - mu *sync.Mutex + Statistic *stats.Statistic + mu *sync.Mutex // Chunks is a list of addresses for the histogram fanout level Chunks []hash.Hash // Active maps a chunk/bucket address to its position in // the histogram. 1-indexed to differentiate from an empty // field on disk Active map[hash.Hash]int + Hist sql.Histogram +} - RowCount uint64 - DistinctCount uint64 - NullCount uint64 - AvgSize uint64 - Qual sql.StatQualifier - CreatedAt time.Time - Histogram DoltHistogram - Columns []string - Types []sql.Type - IdxClass uint8 - LowerBound sql.Row - Fds *sql.FuncDepSet - ColSet sql.ColSet +var _ sql.Statistic = (*DoltStats)(nil) + +func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic { + ret := *s + ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic) + return &ret +} + +func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic { + ret := *s + ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic) + return &ret +} + +func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic { + ret := *s + ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic) + return &ret +} + +func (s *DoltStats) WithRowCount(u uint64) sql.Statistic { + ret := *s + ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic) + return &ret +} + +func (s *DoltStats) WithNullCount(u uint64) sql.Statistic { + ret := *s + ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic) + return &ret +} + +func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic { + ret := *s + ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic) + return &ret +} + +func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic { + ret := *s + ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic) + return &ret +} + +func (s *DoltStats) RowCount() uint64 { + return s.Statistic.RowCount() +} + +func (s *DoltStats) DistinctCount() uint64 { + return s.Statistic.DistinctCount() +} + +func (s *DoltStats) NullCount() uint64 { + return s.Statistic.NullCount() + +} + +func (s *DoltStats) AvgSize() uint64 { + return s.Statistic.AvgSize() + +} + +func (s *DoltStats) CreatedAt() time.Time { + return s.Statistic.CreatedAt() + +} + +func (s *DoltStats) Columns() []string { + return s.Statistic.Columns() +} + +func (s *DoltStats) Types() []sql.Type { + return s.Statistic.Types() +} + +func (s *DoltStats) Qualifier() sql.StatQualifier { + return s.Statistic.Qualifier() +} + +func (s *DoltStats) IndexClass() sql.IndexClass { + return s.Statistic.IndexClass() +} + +func (s *DoltStats) FuncDeps() *sql.FuncDepSet { + return s.Statistic.FuncDeps() +} + +func (s *DoltStats) ColSet() sql.ColSet { + return s.Statistic.ColSet() +} + +func (s *DoltStats) LowerBound() sql.Row { + return s.Statistic.LowerBound() } func NewDoltStats() *DoltStats { - return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int)} + return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}} +} + +func (s *DoltStats) ToInterface() interface{} { + ret := s.Statistic.ToInterface().(map[string]interface{}) + + var hist sql.Histogram + for _, b := range s.Hist { + hist = append(hist, b) + } + ret["statistic"].(map[string]interface{})["buckets"] = hist.ToInterface() + return ret +} + +func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) { + ret := *s + ret.Hist = nil + for _, b := range h { + doltB, ok := b.(DoltBucket) + if !ok { + return nil, fmt.Errorf("invalid bucket type: %T", b) + } + ret.Hist = append(ret.Hist, doltB) + } + return &ret, nil +} + +func (s *DoltStats) Histogram() sql.Histogram { + return s.Hist } func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) { @@ -58,22 +169,15 @@ func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) { if err != nil { return nil, err } - return &DoltStats{ - mu: &sync.Mutex{}, - Qual: stat.Qualifier(), - RowCount: stat.RowCount(), - DistinctCount: stat.DistinctCount(), - NullCount: stat.NullCount(), - AvgSize: stat.AvgSize(), - CreatedAt: stat.CreatedAt(), - Histogram: hist, - Columns: stat.Columns(), - Types: stat.Types(), - IdxClass: uint8(stat.IndexClass()), - LowerBound: stat.LowerBound(), - Fds: stat.FuncDeps(), - ColSet: stat.ColSet(), - }, nil + ret := &DoltStats{ + mu: &sync.Mutex{}, + Hist: hist, + Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()), + Active: make(map[hash.Hash]int), + } + ret.Statistic.Fds = stat.FuncDeps() + ret.Statistic.Colset = stat.ColSet() + return ret, nil } func (s *DoltStats) UpdateActive() { @@ -86,49 +190,26 @@ func (s *DoltStats) UpdateActive() { s.Active = newActive } -func (s *DoltStats) updateCounts() { - s.mu.Lock() - defer s.mu.Unlock() - var newDistinct uint64 - var newRows uint64 - var newNulls uint64 - for _, b := range s.Histogram { - newDistinct += b.DistinctCount - newRows += b.RowCount - newNulls += b.NullCount - } - s.RowCount = newRows - s.DistinctCount = newDistinct - s.NullCount = newNulls -} - -func (s *DoltStats) toSql() sql.Statistic { - s.mu.Lock() - defer s.mu.Unlock() - typStrs := make([]string, len(s.Types)) - for i, typ := range s.Types { - typStrs[i] = typ.String() - } - stat := stats.NewStatistic(s.RowCount, s.DistinctCount, s.NullCount, s.AvgSize, s.CreatedAt, s.Qual, s.Columns, s.Types, s.Histogram.toSql(), sql.IndexClass(s.IdxClass), s.LowerBound) - return stat.WithColSet(s.ColSet).WithFuncDeps(s.Fds) -} - type DoltHistogram []DoltBucket type DoltBucket struct { - Chunk hash.Hash - RowCount uint64 - DistinctCount uint64 - NullCount uint64 - CreatedAt time.Time - Mcvs []sql.Row - McvCount []uint64 - BoundCount uint64 - UpperBound sql.Row + *stats.Bucket + Chunk hash.Hash + Created time.Time } -func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (DoltHistogram, error) { - ret := make([]DoltBucket, len(hist)) +func DoltBucketChunk(b sql.HistogramBucket) hash.Hash { + return b.(DoltBucket).Chunk +} + +func DoltBucketCreated(b sql.HistogramBucket) time.Time { + return b.(DoltBucket).Created +} + +var _ sql.HistogramBucket = (*DoltBucket)(nil) + +func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) { + ret := make(sql.Histogram, len(hist)) var err error for i, b := range hist { upperBound := make(sql.Row, len(b.UpperBound())) @@ -149,24 +230,8 @@ func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (DoltHistogram, error } } ret[i] = DoltBucket{ - RowCount: b.RowCount(), - DistinctCount: b.DistinctCount(), - NullCount: b.NullCount(), - Mcvs: mcvs, - McvCount: b.McvCounts(), - BoundCount: b.BoundCount(), - UpperBound: upperBound, + Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs), } } return ret, nil } - -func (s DoltHistogram) toSql() []*stats.Bucket { - ret := make([]*stats.Bucket, len(s)) - for i, b := range s { - upperBound := make([]interface{}, len(b.UpperBound)) - copy(upperBound, b.UpperBound) - ret[i] = stats.NewHistogramBucket(b.RowCount, b.DistinctCount, b.NullCount, b.BoundCount, upperBound, b.McvCount, b.Mcvs) - } - return ret -} diff --git a/go/libraries/doltcore/sqle/statspro/interface.go b/go/libraries/doltcore/sqle/statspro/interface.go index c525c625ca..ae56b834b1 100644 --- a/go/libraries/doltcore/sqle/statspro/interface.go +++ b/go/libraries/doltcore/sqle/statspro/interface.go @@ -45,7 +45,7 @@ type Database interface { DeleteStats(branch string, quals ...sql.StatQualifier) // ReplaceChunks is an update interface that lets a stats implementation // decide how to edit stats for a stats refresh. - ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []DoltBucket) error + ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error // Flush instructs the database to sync any partial state to disk Flush(ctx context.Context, branch string) error // Close finalizes any file references. diff --git a/go/libraries/doltcore/sqle/statspro/stats_provider.go b/go/libraries/doltcore/sqle/statspro/stats_provider.go index 1008a5f4aa..1242f21045 100644 --- a/go/libraries/doltcore/sqle/statspro/stats_provider.go +++ b/go/libraries/doltcore/sqle/statspro/stats_provider.go @@ -38,8 +38,8 @@ type indexMeta struct { newNodes []tree.Node // updateOrdinals are [start, stop] tuples for each update chunk updateOrdinals []updateOrdinal - keepChunks []DoltBucket - dropChunks []DoltBucket + keepChunks []sql.HistogramBucket + dropChunks []sql.HistogramBucket allAddrs []hash.Hash } @@ -160,7 +160,7 @@ func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, table string) for _, qual := range statDb.ListStatQuals(branch) { if strings.EqualFold(db, qual.Database) && strings.EqualFold(table, qual.Tab) { stat, _ := statDb.GetStat(branch, qual) - ret = append(ret, stat.toSql()) + ret = append(ret, stat) } } @@ -224,7 +224,7 @@ func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string if !ok { return nil, false } - return stat.toSql(), true + return stat, true } func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error { @@ -299,7 +299,7 @@ func (p *Provider) RowCount(ctx *sql.Context, db, table string) (uint64, error) return 0, nil } - return priStats.RowCount, nil + return priStats.RowCount(), nil } func (p *Provider) DataLength(ctx *sql.Context, db, table string) (uint64, error) { @@ -322,5 +322,5 @@ func (p *Provider) DataLength(ctx *sql.Context, db, table string) (uint64, error return 0, nil } - return priStats.AvgSize, nil + return priStats.AvgSize(), nil } diff --git a/go/libraries/doltcore/sqle/statspro/update.go b/go/libraries/doltcore/sqle/statspro/update.go index 227f25885e..445a436354 100644 --- a/go/libraries/doltcore/sqle/statspro/update.go +++ b/go/libraries/doltcore/sqle/statspro/update.go @@ -82,13 +82,13 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta } else if cnt == 0 { // table is empty ret[meta.qual] = NewDoltStats() - ret[meta.qual].CreatedAt = time.Now() - ret[meta.qual].Columns = meta.cols - ret[meta.qual].Types = types - ret[meta.qual].Qual = meta.qual + ret[meta.qual].Statistic.Created = time.Now() + ret[meta.qual].Statistic.Cols = meta.cols + ret[meta.qual].Statistic.Typs = types + ret[meta.qual].Statistic.Qual = meta.qual - ret[meta.qual].Fds = fds - ret[meta.qual].ColSet = colSet + ret[meta.qual].Statistic.Fds = fds + ret[meta.qual].Statistic.Colset = colSet continue } @@ -100,10 +100,10 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc()) ret[meta.qual] = NewDoltStats() ret[meta.qual].Chunks = meta.allAddrs - ret[meta.qual].CreatedAt = time.Now() - ret[meta.qual].Columns = meta.cols - ret[meta.qual].Types = types - ret[meta.qual].Qual = meta.qual + ret[meta.qual].Statistic.Created = time.Now() + ret[meta.qual].Statistic.Cols = meta.cols + ret[meta.qual].Statistic.Typs = types + ret[meta.qual].Statistic.Qual = meta.qual var start, stop uint64 // read leaf rows for each bucket @@ -140,14 +140,14 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta return nil, err } bucket.Chunk = chunk.HashOf() - ret[updater.qual].Histogram = append(ret[updater.qual].Histogram, bucket) + ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket) } - ret[updater.qual].DistinctCount = uint64(updater.globalDistinct) - ret[updater.qual].RowCount = uint64(updater.globalCount) - ret[updater.qual].LowerBound = firstRow - ret[updater.qual].Fds = fds - ret[updater.qual].ColSet = colSet + ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct) + ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount) + ret[updater.qual].Statistic.LowerBnd = firstRow + ret[updater.qual].Statistic.Fds = fds + ret[updater.qual].Statistic.Colset = colSet ret[updater.qual].UpdateActive() } return ret, nil @@ -156,22 +156,22 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta // MergeNewChunks combines a set of old and new chunks to create // the desired target histogram. Undefined behavior if a |targetHash| // does not exist in either |oldChunks| or |newChunks|. -func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []DoltBucket) ([]DoltBucket, error) { +func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) { hashToPos := make(map[hash.Hash]int, len(inputHashes)) for i, h := range inputHashes { hashToPos[h] = i } var cnt int - targetBuckets := make([]DoltBucket, len(inputHashes)) + targetBuckets := make([]sql.HistogramBucket, len(inputHashes)) for _, c := range oldChunks { - if idx, ok := hashToPos[c.Chunk]; ok { + if idx, ok := hashToPos[DoltBucketChunk(c)]; ok { cnt++ targetBuckets[idx] = c } } for _, c := range newChunks { - if idx, ok := hashToPos[c.Chunk]; ok && targetBuckets[idx].Chunk.IsEmpty() { + if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil { cnt++ targetBuckets[idx] = c } @@ -280,13 +280,15 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu } } return DoltBucket{ - RowCount: uint64(u.count), - DistinctCount: uint64(u.distinct), - BoundCount: uint64(u.currentCnt), - Mcvs: mcvRows, - McvCount: u.mcvs.Counts(), - UpperBound: upperBound, - NullCount: uint64(u.nulls), + Bucket: &stats.Bucket{ + RowCnt: uint64(u.count), + DistinctCnt: uint64(u.distinct), + BoundCnt: uint64(u.currentCnt), + McvVals: mcvRows, + McvsCnt: u.mcvs.Counts(), + BoundVal: upperBound, + NullCnt: uint64(u.nulls), + }, }, nil } diff --git a/go/libraries/doltcore/sqle/statspro/update_test.go b/go/libraries/doltcore/sqle/statspro/update_test.go index e4b3935473..cfe0a48711 100644 --- a/go/libraries/doltcore/sqle/statspro/update_test.go +++ b/go/libraries/doltcore/sqle/statspro/update_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -66,109 +67,109 @@ func TestBucketBuilder(t *testing.T) { name: "ints", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{ - RowCount: 15, - DistinctCount: 5, - Mcvs: []sql.Row{{int64(4)}, {int64(2)}, {int64(3)}}, - McvCount: []uint64{3, 4, 3}, - UpperBound: sql.Row{int64(5)}, - BoundCount: 2, - }, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 15, + DistinctCnt: 5, + McvVals: []sql.Row{{int64(4)}, {int64(2)}, {int64(3)}}, + McvsCnt: []uint64{3, 4, 3}, + BoundVal: sql.Row{int64(5)}, + BoundCnt: 2, + }}, }, { // technically nulls should be at beginning name: "ints with middle nulls", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{ - RowCount: 16, - DistinctCount: 6, - NullCount: 3, - Mcvs: []sql.Row{{int64(4)}, {int64(2)}, {nil}}, - McvCount: []uint64{3, 4, 3}, - UpperBound: sql.Row{int64(5)}, - BoundCount: 2, - }, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 16, + DistinctCnt: 6, + NullCnt: 3, + McvVals: []sql.Row{{int64(4)}, {int64(2)}, {nil}}, + McvsCnt: []uint64{3, 4, 3}, + BoundVal: sql.Row{int64(5)}, + BoundCnt: 2, + }}, }, { name: "ints with beginning nulls", keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{ - RowCount: 15, - DistinctCount: 6, - NullCount: 2, - Mcvs: []sql.Row{{int64(3)}, {int64(4)}, {int64(2)}}, - McvCount: []uint64{3, 3, 4}, - UpperBound: sql.Row{int64(5)}, - BoundCount: 2, - }, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 15, + DistinctCnt: 6, + NullCnt: 2, + McvVals: []sql.Row{{int64(3)}, {int64(4)}, {int64(2)}}, + McvsCnt: []uint64{3, 3, 4}, + BoundVal: sql.Row{int64(5)}, + BoundCnt: 2, + }}, }, { name: "more ints", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{ - RowCount: 22, - DistinctCount: 7, - BoundCount: 1, - Mcvs: []sql.Row{{int64(2)}, {int64(6)}, {int64(5)}}, - McvCount: []uint64{4, 4, 4}, - UpperBound: sql.Row{int64(7)}, - }, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 22, + DistinctCnt: 7, + BoundCnt: 1, + McvVals: []sql.Row{{int64(2)}, {int64(6)}, {int64(5)}}, + McvsCnt: []uint64{4, 4, 4}, + BoundVal: sql.Row{int64(7)}, + }}, }, { name: "2-ints", keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{ - RowCount: 15, - DistinctCount: 11, - Mcvs: []sql.Row{{int64(1), int64(1)}, {int64(4), int64(1)}, {int64(2), int64(3)}}, - McvCount: []uint64{2, 3, 2}, - UpperBound: sql.Row{int64(5), int64(2)}, - BoundCount: 1, - }, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 15, + DistinctCnt: 11, + McvVals: []sql.Row{{int64(1), int64(1)}, {int64(4), int64(1)}, {int64(2), int64(3)}}, + McvsCnt: []uint64{2, 3, 2}, + BoundVal: sql.Row{int64(5), int64(2)}, + BoundCnt: 1, + }}, }, { name: "2-ints with nulls", keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{ - RowCount: 5, - DistinctCount: 5, - NullCount: 3, - Mcvs: []sql.Row{{int64(2), int64(2)}, {int64(1), nil}, {int64(1), int64(2)}}, - McvCount: []uint64{1, 1, 1}, - UpperBound: sql.Row{int64(2), int64(2)}, - BoundCount: 1, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 5, + DistinctCnt: 5, + NullCnt: 3, + McvVals: []sql.Row{{int64(2), int64(2)}, {int64(1), nil}, {int64(1), int64(2)}}, + McvsCnt: []uint64{1, 1, 1}, + BoundVal: sql.Row{int64(2), int64(2)}, + BoundCnt: 1}, }, }, { name: "varchars", keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}), - bucket: DoltBucket{ - RowCount: 15, - DistinctCount: 9, - Mcvs: []sql.Row{{"i"}, {"h"}, {"g"}}, - McvCount: []uint64{2, 3, 3}, - UpperBound: sql.Row{"i"}, - BoundCount: 2, - }, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 15, + DistinctCnt: 9, + McvVals: []sql.Row{{"i"}, {"h"}, {"g"}}, + McvsCnt: []uint64{2, 3, 3}, + BoundVal: sql.Row{"i"}, + BoundCnt: 2, + }}, }, { name: "varchar-ints", keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{ - RowCount: 15, - DistinctCount: 12, - Mcvs: []sql.Row{{"i", int64(1)}, {"g", int64(2)}, {"h", int64(1)}}, - McvCount: []uint64{2, 2, 2}, - UpperBound: sql.Row{"i", int64(1)}, - BoundCount: 2, - }, + bucket: DoltBucket{Bucket: &stats.Bucket{ + RowCnt: 15, + DistinctCnt: 12, + McvVals: []sql.Row{{"i", int64(1)}, {"g", int64(2)}, {"h", int64(1)}}, + McvsCnt: []uint64{2, 2, 2}, + BoundVal: sql.Row{"i", int64(1)}, + BoundCnt: 2, + }}, }, } @@ -190,13 +191,13 @@ func TestBucketBuilder(t *testing.T) { bucket, err := b.finalize(ctx, nil) require.NoError(t, err) - require.Equal(t, int(tt.bucket.RowCount), int(bucket.RowCount)) - require.Equal(t, int(tt.bucket.NullCount), int(bucket.NullCount)) - require.Equal(t, int(tt.bucket.DistinctCount), int(bucket.DistinctCount)) - require.Equal(t, int(tt.bucket.BoundCount), int(bucket.BoundCount)) - require.Equal(t, tt.bucket.UpperBound, bucket.UpperBound) - require.Equal(t, tt.bucket.McvCount, bucket.McvCount) - require.Equal(t, tt.bucket.Mcvs, bucket.Mcvs) + require.Equal(t, int(tt.bucket.RowCount()), int(bucket.RowCount())) + require.Equal(t, int(tt.bucket.NullCount()), int(bucket.NullCount())) + require.Equal(t, int(tt.bucket.DistinctCount()), int(bucket.DistinctCount())) + require.Equal(t, int(tt.bucket.BoundCount()), int(bucket.BoundCount())) + require.Equal(t, tt.bucket.UpperBound(), bucket.UpperBound()) + require.Equal(t, tt.bucket.McvsCnt, bucket.McvsCnt) + require.Equal(t, tt.bucket.Mcvs(), bucket.Mcvs()) }) } } diff --git a/go/store/chunks/chunk_store.go b/go/store/chunks/chunk_store.go index dbc96fd588..16003b38d6 100644 --- a/go/store/chunks/chunk_store.go +++ b/go/store/chunks/chunk_store.go @@ -168,7 +168,7 @@ type ChunkStoreGarbageCollector interface { BeginGC(addChunk func(hash.Hash) bool) error // EndGC indicates that the GC is over. The previously provided - // addChunk function must not be called after this function function. + // addChunk function must not be called after this function. EndGC() // MarkAndSweepChunks is expected to read chunk addresses off of diff --git a/go/store/nbs/store.go b/go/store/nbs/store.go index 2daf00337d..c9fd12be55 100644 --- a/go/store/nbs/store.go +++ b/go/store/nbs/store.go @@ -100,7 +100,9 @@ type NomsBlockStore struct { cond *sync.Cond gcInProgress bool - keeperFunc func(hash.Hash) bool + // keeperFunc is set when |gcInProgress| and appends to the GC sweep queue + // or blocks on GC finalize + keeperFunc func(hash.Hash) bool mtSize uint64 putCount uint64