From 13ee830c25513100e0a9a1bf4b9ca92f26748bf9 Mon Sep 17 00:00:00 2001 From: Maximilian Hoffman Date: Tue, 11 Mar 2025 12:24:42 -0700 Subject: [PATCH] [statspro] stats ignore non prolly indexes (#8969) --- go/cmd/dolt/commands/debug.go | 7 +---- go/libraries/doltcore/doltdb/durable/index.go | 11 +++++++ .../doltcore/sqle/enginetest/stats_queries.go | 25 ++++++++++++++++ .../doltcore/sqle/statspro/analyze.go | 30 ++++++++++++------- .../doltcore/sqle/statspro/auto_refresh.go | 5 +++- go/libraries/doltcore/sqle/statspro/update.go | 5 +++- 6 files changed, 64 insertions(+), 19 deletions(-) diff --git a/go/cmd/dolt/commands/debug.go b/go/cmd/dolt/commands/debug.go index 7c79fb9d66..79339b90b7 100644 --- a/go/cmd/dolt/commands/debug.go +++ b/go/cmd/dolt/commands/debug.go @@ -360,10 +360,8 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq eng := sqlEng.GetUnderlyingEngine() eng.Analyzer.Debug = true - eng.Analyzer.Verbose = true defer func() { eng.Analyzer.Debug = false - eng.Analyzer.Verbose = false }() analysisFile, err := os.Create(filepath.Join(tempDir, "analysis.txt")) if err != nil { @@ -378,8 +376,6 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq defer planFile.Close() planBuf := bufio.NewWriter(planFile) - defer planBuf.Flush() - analyzer.SetOutput(analysisFile) logrus.SetOutput(analysisFile) log.SetOutput(analysisFile) @@ -417,7 +413,6 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq planned, err := eng.AnalyzeQuery(ctx, query) if err != nil { - fmt.Fprintf(planBuf, "error: %s\n", err.Error()) return err } @@ -426,7 +421,7 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq fmt.Fprintf(planBuf, "debug plan: \n%s", sql.DebugString(planned)) } - return nil + return planBuf.Flush() } func execDebugMode(ctx *sql.Context, qryist cli.Queryist, queryFile *os.File, continueOnErr bool, format engine.PrintResultFormat) error { diff --git a/go/libraries/doltcore/doltdb/durable/index.go b/go/libraries/doltcore/doltdb/durable/index.go index d6f2ecef74..db5385f135 100644 --- a/go/libraries/doltcore/doltdb/durable/index.go +++ b/go/libraries/doltcore/doltdb/durable/index.go @@ -277,6 +277,17 @@ func ProllyMapFromIndex(i Index) prolly.Map { return i.(prollyIndex).index } +// xxx: don't use this, temporary fix waiting for bigger +// fix in stats 2.0 +func MaybeProllyMapFromIndex(i Index) (prolly.Map, bool) { + ret, ok := i.(prollyIndex) + if ok { + return ret.index, true + } else { + return prolly.Map{}, false + } +} + // MapFromIndex unwraps the Index and returns the underlying map as an interface. func MapFromIndex(i Index) prolly.MapInterfaceWithMutable { switch indexType := i.(type) { diff --git a/go/libraries/doltcore/sqle/enginetest/stats_queries.go b/go/libraries/doltcore/sqle/enginetest/stats_queries.go index fedb7297d5..a5616e37f8 100644 --- a/go/libraries/doltcore/sqle/enginetest/stats_queries.go +++ b/go/libraries/doltcore/sqle/enginetest/stats_queries.go @@ -331,6 +331,31 @@ var DoltStatsStorageTests = []queries.ScriptTest{ }, }, }, + { + Name: "issue 8964: alternative indexes panic", + SetUpScript: []string{ + "create table geom_tbl(g geometry not null srid 0)", + "insert into geom_tbl values (point(0,0)), (linestring(point(1,1), point(2,2)))", + "alter table geom_tbl add spatial index (g)", + "CREATE TABLE fullt_tbl (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), v2 VARCHAR(200), FULLTEXT idx (v1, v2));", + "INSERT INTO fullt_tbl VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');", + "create table vector_tbl (id int primary key, v json);", + `insert into vector_tbl values (1, '[4.0,3.0]'), (2, '[0.0,0.0]'), (3, '[-1.0,1.0]'), (4, '[0.0,-2.0]');`, + `create vector index v_idx on vector_tbl(v);`, + "create table gen_tbl (a int primary key, b int as (a + 1) stored)", + "insert into gen_tbl (a) values (0), (1), (2)", + "create index i1 on gen_tbl(b)", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "analyze table geom_tbl, fullt_tbl, vector_tbl, gen_tbl", + }, + { + Query: "select table_name, index_name from dolt_statistics", + Expected: []sql.Row{{"fullt_tbl", "primary"}, {"gen_tbl", "primary"}, {"gen_tbl", "i1"}, {"vector_tbl", "primary"}}, + }, + }, + }, { Name: "comma encoding bug", SetUpScript: []string{ diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go index 6fd822e75a..45efb4ab05 100644 --- a/go/libraries/doltcore/sqle/statspro/analyze.go +++ b/go/libraries/doltcore/sqle/statspro/analyze.go @@ -179,11 +179,13 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table curStat = NewDoltStats() curStat.Statistic.Qual = qual } - idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols) + idxMeta, ok, err := newIdxMeta(ctx, curStat, dTab, idx, cols) if err != nil { return err } - idxMetas = append(idxMetas, idxMeta) + if ok { + idxMetas = append(idxMetas, idxMeta) + } } newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas) @@ -193,7 +195,10 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table // merge new chunks with preexisting chunks for _, idxMeta := range idxMetas { - stat := newTableStats[idxMeta.qual] + stat, ok := newTableStats[idxMeta.qual] + if !ok { + continue + } targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist) if err != nil { return err @@ -260,7 +265,7 @@ func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql return sqlTable, dTab, nil } -func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, error) { +func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, bool, error) { var idx durable.Index var err error if strings.EqualFold(sqlIndex.ID(), "PRIMARY") { @@ -269,24 +274,27 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID()) } if err != nil { - return indexMeta{}, err + return indexMeta{}, false, err } - prollyMap := durable.ProllyMapFromIndex(idx) + prollyMap, ok := durable.MaybeProllyMapFromIndex(idx) + if !ok { + return indexMeta{}, false, nil + } if cnt, err := prollyMap.Count(); err != nil { - return indexMeta{}, err + return indexMeta{}, false, err } else if cnt == 0 { return indexMeta{ qual: curStats.Statistic.Qual, cols: cols, - }, nil + }, true, nil } // get newest histogram target level hashes levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt) if err != nil { - return indexMeta{}, err + return indexMeta{}, false, err } var addrs []hash.Hash @@ -303,7 +311,7 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, // track the (start, end) ordinal offsets to simplify the read iter. treeCnt, err := n.TreeCount() if err != nil { - return indexMeta{}, err + return indexMeta{}, false, err } addrs = append(addrs, n.HashOf()) @@ -339,5 +347,5 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, keepChunks: keepChunks, dropChunks: dropChunks, allAddrs: addrs, - }, nil + }, true, nil } diff --git a/go/libraries/doltcore/sqle/statspro/auto_refresh.go b/go/libraries/doltcore/sqle/statspro/auto_refresh.go index f08e5315e1..df4ed85285 100644 --- a/go/libraries/doltcore/sqle/statspro/auto_refresh.go +++ b/go/libraries/doltcore/sqle/statspro/auto_refresh.go @@ -228,11 +228,14 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br } ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String()) - updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns()) + updateMeta, ok, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns()) if err != nil { ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) continue } + if !ok { + continue + } curCnt := float64(len(curStat.Active)) updateCnt := float64(len(updateMeta.newNodes)) deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks)) diff --git a/go/libraries/doltcore/sqle/statspro/update.go b/go/libraries/doltcore/sqle/statspro/update.go index 36efbad583..225d79b6bb 100644 --- a/go/libraries/doltcore/sqle/statspro/update.go +++ b/go/libraries/doltcore/sqle/statspro/update.go @@ -53,6 +53,10 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta ret := make(map[sql.StatQualifier]*DoltStats) for _, meta := range idxMetas { + sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())] + if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() { + continue + } var idx durable.Index var err error if strings.EqualFold(meta.qual.Index(), "PRIMARY") { @@ -67,7 +71,6 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta prollyMap := durable.ProllyMapFromIndex(idx) keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc()) - sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())] fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx) if err != nil { return nil, err