[statspro] stats ignore non prolly indexes (#8969)

This commit is contained in:
Maximilian Hoffman
2025-03-11 12:24:42 -07:00
committed by GitHub
parent 58dabb6adc
commit 13ee830c25
6 changed files with 64 additions and 19 deletions

View File

@@ -360,10 +360,8 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq
eng := sqlEng.GetUnderlyingEngine()
eng.Analyzer.Debug = true
eng.Analyzer.Verbose = true
defer func() {
eng.Analyzer.Debug = false
eng.Analyzer.Verbose = false
}()
analysisFile, err := os.Create(filepath.Join(tempDir, "analysis.txt"))
if err != nil {
@@ -378,8 +376,6 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq
defer planFile.Close()
planBuf := bufio.NewWriter(planFile)
defer planBuf.Flush()
analyzer.SetOutput(analysisFile)
logrus.SetOutput(analysisFile)
log.SetOutput(analysisFile)
@@ -417,7 +413,6 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq
planned, err := eng.AnalyzeQuery(ctx, query)
if err != nil {
fmt.Fprintf(planBuf, "error: %s\n", err.Error())
return err
}
@@ -426,7 +421,7 @@ func debugAnalyze(ctx *sql.Context, tempDir string, sqlEng *engine.SqlEngine, sq
fmt.Fprintf(planBuf, "debug plan: \n%s", sql.DebugString(planned))
}
return nil
return planBuf.Flush()
}
func execDebugMode(ctx *sql.Context, qryist cli.Queryist, queryFile *os.File, continueOnErr bool, format engine.PrintResultFormat) error {

View File

@@ -277,6 +277,17 @@ func ProllyMapFromIndex(i Index) prolly.Map {
return i.(prollyIndex).index
}
// xxx: don't use this, temporary fix waiting for bigger
// fix in stats 2.0
func MaybeProllyMapFromIndex(i Index) (prolly.Map, bool) {
ret, ok := i.(prollyIndex)
if ok {
return ret.index, true
} else {
return prolly.Map{}, false
}
}
// MapFromIndex unwraps the Index and returns the underlying map as an interface.
func MapFromIndex(i Index) prolly.MapInterfaceWithMutable {
switch indexType := i.(type) {

View File

@@ -331,6 +331,31 @@ var DoltStatsStorageTests = []queries.ScriptTest{
},
},
},
{
Name: "issue 8964: alternative indexes panic",
SetUpScript: []string{
"create table geom_tbl(g geometry not null srid 0)",
"insert into geom_tbl values (point(0,0)), (linestring(point(1,1), point(2,2)))",
"alter table geom_tbl add spatial index (g)",
"CREATE TABLE fullt_tbl (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), v2 VARCHAR(200), FULLTEXT idx (v1, v2));",
"INSERT INTO fullt_tbl VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');",
"create table vector_tbl (id int primary key, v json);",
`insert into vector_tbl values (1, '[4.0,3.0]'), (2, '[0.0,0.0]'), (3, '[-1.0,1.0]'), (4, '[0.0,-2.0]');`,
`create vector index v_idx on vector_tbl(v);`,
"create table gen_tbl (a int primary key, b int as (a + 1) stored)",
"insert into gen_tbl (a) values (0), (1), (2)",
"create index i1 on gen_tbl(b)",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "analyze table geom_tbl, fullt_tbl, vector_tbl, gen_tbl",
},
{
Query: "select table_name, index_name from dolt_statistics",
Expected: []sql.Row{{"fullt_tbl", "primary"}, {"gen_tbl", "primary"}, {"gen_tbl", "i1"}, {"vector_tbl", "primary"}},
},
},
},
{
Name: "comma encoding bug",
SetUpScript: []string{

View File

@@ -179,11 +179,13 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table
curStat = NewDoltStats()
curStat.Statistic.Qual = qual
}
idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
idxMeta, ok, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
if err != nil {
return err
}
idxMetas = append(idxMetas, idxMeta)
if ok {
idxMetas = append(idxMetas, idxMeta)
}
}
newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
@@ -193,7 +195,10 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table
// merge new chunks with preexisting chunks
for _, idxMeta := range idxMetas {
stat := newTableStats[idxMeta.qual]
stat, ok := newTableStats[idxMeta.qual]
if !ok {
continue
}
targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist)
if err != nil {
return err
@@ -260,7 +265,7 @@ func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql
return sqlTable, dTab, nil
}
func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, error) {
func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, bool, error) {
var idx durable.Index
var err error
if strings.EqualFold(sqlIndex.ID(), "PRIMARY") {
@@ -269,24 +274,27 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table,
idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID())
}
if err != nil {
return indexMeta{}, err
return indexMeta{}, false, err
}
prollyMap := durable.ProllyMapFromIndex(idx)
prollyMap, ok := durable.MaybeProllyMapFromIndex(idx)
if !ok {
return indexMeta{}, false, nil
}
if cnt, err := prollyMap.Count(); err != nil {
return indexMeta{}, err
return indexMeta{}, false, err
} else if cnt == 0 {
return indexMeta{
qual: curStats.Statistic.Qual,
cols: cols,
}, nil
}, true, nil
}
// get newest histogram target level hashes
levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
if err != nil {
return indexMeta{}, err
return indexMeta{}, false, err
}
var addrs []hash.Hash
@@ -303,7 +311,7 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table,
// track the (start, end) ordinal offsets to simplify the read iter.
treeCnt, err := n.TreeCount()
if err != nil {
return indexMeta{}, err
return indexMeta{}, false, err
}
addrs = append(addrs, n.HashOf())
@@ -339,5 +347,5 @@ func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table,
keepChunks: keepChunks,
dropChunks: dropChunks,
allAddrs: addrs,
}, nil
}, true, nil
}

View File

@@ -228,11 +228,14 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br
}
ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String())
updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
updateMeta, ok, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
if err != nil {
ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
continue
}
if !ok {
continue
}
curCnt := float64(len(curStat.Active))
updateCnt := float64(len(updateMeta.newNodes))
deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks))

View File

@@ -53,6 +53,10 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta
ret := make(map[sql.StatQualifier]*DoltStats)
for _, meta := range idxMetas {
sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())]
if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() {
continue
}
var idx durable.Index
var err error
if strings.EqualFold(meta.qual.Index(), "PRIMARY") {
@@ -67,7 +71,6 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta
prollyMap := durable.ProllyMapFromIndex(idx)
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc())
sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())]
fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx)
if err != nil {
return nil, err