[stats] stats table name sensitivity tests (#8684)

* table name sensitivity tests

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* fix bats

* build issue

* disable stats collection for slow bats test

* use dsess.SqlDatabase for branchdb

* see if disabling bootstrap makes a difference for timeout test

* no lambda bats for slow diff query

* undo lambda change

* nick comments

---------

Co-authored-by: max-hoffman <max-hoffman@users.noreply.github.com>
This commit is contained in:
Maximilian Hoffman
2024-12-23 15:34:09 -08:00
committed by GitHub
parent 049dea8be1
commit d98baafd3e
12 changed files with 198 additions and 65 deletions

View File

@@ -1663,9 +1663,9 @@ func TestStatsHistograms(t *testing.T) {
// TestStatsIO force a provider reload in-between setup and assertions that
// forces a round trip of the statistics table before inspecting values.
func TestStatsIO(t *testing.T) {
func TestStatsStorage(t *testing.T) {
h := newDoltEnginetestHarness(t)
RunStatsIOTests(t, h)
RunStatsStorageTests(t, h)
}
func TestStatsIOWithoutReload(t *testing.T) {

View File

@@ -1553,8 +1553,8 @@ func RunStatsHistogramTests(t *testing.T, h DoltEnginetestHarness) {
}
}
func RunStatsIOTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsIOTests, DoltHistogramTests...) {
func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
@@ -1569,7 +1569,7 @@ func RunStatsIOTests(t *testing.T, h DoltEnginetestHarness) {
}
func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsIOTests, DoltHistogramTests...) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()

View File

@@ -295,7 +295,7 @@ var DoltHistogramTests = []queries.ScriptTest{
},
}
var DoltStatsIOTests = []queries.ScriptTest{
var DoltStatsStorageTests = []queries.ScriptTest{
{
Name: "single-table",
SetUpScript: []string{
@@ -569,6 +569,73 @@ var DoltStatsIOTests = []queries.ScriptTest{
},
},
},
{
Name: "differentiate table cases",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table XY (x bigint primary key, y varchar(16))",
"insert into XY values (0,'0'), (1,'1'), (2,'2')",
"analyze table XY",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "deleted table loads OK",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"CREATE table uv (u bigint primary key, v varchar(16))",
"insert into uv values (0,'0'), (1,'1'), (2,'2')",
"analyze table uv",
"drop table uv",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "differentiate branch names",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main,feat'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"call dolt_checkout('-b', 'feat')",
"CREATE table xy (x varchar(16) primary key, y bigint, z bigint)",
"insert into xy values (3,'3',3)",
"analyze table xy",
"call dolt_checkout('main')",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
{
Query: "call dolt_checkout('feat')",
},
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "3"}},
},
},
},
{
Name: "drop primary key",
SetUpScript: []string{
@@ -963,11 +1030,15 @@ func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine,
t.Errorf("expected *gms.Engine but found: %T", e)
}
branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb")
brCopy := make([]string, len(branches))
copy(brCopy, branches)
err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
require.NoError(t, err)
err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", "main")
require.NoError(t, err)
for _, branch := range brCopy {
err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch)
require.NoError(t, err)
}
}
for _, assertion := range assertions {

View File

@@ -141,7 +141,20 @@ func (n *NomsStatsDatabase) Branches() []string {
}
func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
if ok, err := n.SchemaChange(ctx, branch); err != nil {
branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch)
dSess := dsess.DSessFromSess(ctx.Session)
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
if err != nil {
ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch)
return nil
}
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
if !ok {
return fmt.Errorf("branch/database not found: %s", branchQDbName)
}
if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil {
return err
} else if ok {
ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name())
@@ -164,7 +177,7 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err
return n.trackBranch(ctx, branch)
}
doltStats, err := loadStats(ctx, n.sourceDb, statsMap)
doltStats, err := loadStats(ctx, branchQDb, statsMap)
if err != nil {
return err
}
@@ -176,12 +189,12 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err
return nil
}
func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool, error) {
root, err := n.sourceDb.GetRoot(ctx)
func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) {
root, err := branchQDb.GetRoot(ctx)
if err != nil {
return false, err
}
tables, err := n.sourceDb.GetTableNames(ctx)
tables, err := branchQDb.GetTableNames(ctx)
if err != nil {
return false, err
}
@@ -201,7 +214,7 @@ func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool,
return false, err
}
keys = append(keys, branch+"/"+tableName)
keys = append(keys, n.schemaTupleKey(branch, tableName))
schHashes = append(schHashes, curHash)
}
@@ -217,8 +230,6 @@ func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string) (bool,
schemaChange = true
break
}
} else if err != nil {
return false, err
}
}
if schemaChange {
@@ -438,7 +449,7 @@ func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName
if strings.EqualFold(branch, b) {
return n.schemaHashes[i][tableName], nil
}
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, branch+"/"+tableName); ok {
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok {
if err != nil {
return hash.Hash{}, err
}
@@ -453,6 +464,10 @@ func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName
return hash.Hash{}, nil
}
func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string {
return n.sourceDb.Name() + "/" + branch + "/" + tableName
}
func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error {
n.mu.Lock()
defer n.mu.Unlock()
@@ -471,7 +486,7 @@ func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName
}
n.schemaHashes[branchIdx][tableName] = h
key := branch + "/" + tableName
key := n.schemaTupleKey(branch, tableName)
if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound {
return err
}

View File

@@ -45,6 +45,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
return nil, err
}
currentStat := statspro.NewDoltStats()
invalidTables := make(map[string]bool)
for {
row, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
@@ -74,27 +75,31 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
}
qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName)
if _, ok := invalidTables[tableName]; ok {
continue
}
if currentStat.Statistic.Qual.String() != qual.String() {
if !currentStat.Statistic.Qual.Empty() {
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
if err != nil {
return nil, err
}
currentStat.Statistic.Fds = fds
currentStat.Statistic.Colset = colSet
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}
currentStat = statspro.NewDoltStats()
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
if ok {
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
} else if !ok {
ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName)
invalidTables[qual.Table()] = true
continue
} else if err != nil {
return nil, err
}
}
@@ -168,18 +173,10 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
currentStat.Statistic.Created = createdAt
}
}
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
if !currentStat.Qualifier().Empty() {
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}
fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
if err != nil {
return nil, err
}
currentStat.Statistic.Fds = fds
currentStat.Statistic.Colset = colSet
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
return qualToStats, nil
}
@@ -195,14 +192,44 @@ func parseTypeStrings(typs []string) ([]sql.Type, error) {
return ret, nil
}
func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, error) {
func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) {
root, err := db.GetRoot(ctx)
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: qual.Table()})
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
iat, ok := sqlTable.(sql.IndexAddressable)
if !ok {
return nil, nil, sql.ErrTableNotFound.New(qual.Table())
return nil, nil, nil, sql.ColSet{}, nil
}
indexes, err := iat.GetIndexes(ctx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
var sqlIdx sql.Index
for _, i := range indexes {
if strings.EqualFold(i.ID(), qual.Index()) {
sqlIdx = i
break
}
}
if sqlIdx == nil {
return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
}
fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()})
if !ok {
return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table())
}
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
var idx durable.Index
@@ -212,7 +239,7 @@ func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifi
idx, err = table.GetIndexRowData(ctx, qual.Index())
}
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
prollyMap := durable.ProllyMapFromIndex(idx)
@@ -220,17 +247,17 @@ func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifi
buffPool := prollyMap.NodeStore().Pool()
if cnt, err := prollyMap.Count(); err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
} else if cnt == 0 {
return nil, keyBuilder, nil
return nil, keyBuilder, nil, sql.ColSet{}, nil
}
firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
keyBytes, _, err := firstIter.Next(ctx)
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
for i := range keyBuilder.Desc.Types {
keyBuilder.PutRaw(i, keyBytes.GetField(i))
@@ -241,10 +268,10 @@ func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifi
for i := 0; i < keyBuilder.Desc.Count(); i++ {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, nil, err
return nil, nil, nil, sql.ColSet{}, err
}
}
return firstRow, keyBuilder, nil
return firstRow, keyBuilder, fds, colset, nil
}
func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {

View File

@@ -47,7 +47,7 @@ func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error {
branches := p.getStatsBranches(ctx)
var rows uint64
for _, branch := range branches {
sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch))
sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
if err != nil {
if sql.ErrDatabaseNotFound.Is(err) {
// default branch is not valid
@@ -91,7 +91,7 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table
dSess := dsess.DSessFromSess(ctx.Session)
sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch))
sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
if err != nil {
return err
}
@@ -214,9 +214,9 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table
return statDb.Flush(ctx, branch)
}
// branchQualifiedDatabase returns a branch qualified database. If the database
// BranchQualifiedDatabase returns a branch qualified database. If the database
// is already branch suffixed no duplication is applied.
func (p *Provider) branchQualifiedDatabase(db, branch string) string {
func BranchQualifiedDatabase(db, branch string) string {
suffix := fmt.Sprintf("/%s", branch)
if !strings.HasSuffix(db, suffix) {
return fmt.Sprintf("%s%s", db, suffix)

View File

@@ -85,7 +85,7 @@ func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context
if br, ok, err := ddb.HasBranch(ctx, branch); ok {
sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String())
// update WORKING session references
sqlDb, err := dSess.Provider().Database(sqlCtx, p.branchQualifiedDatabase(dbName, branch))
sqlDb, err := dSess.Provider().Database(sqlCtx, BranchQualifiedDatabase(dbName, branch))
if err != nil {
sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
return

View File

@@ -16,6 +16,7 @@ package statspro
import (
"context"
"fmt"
"strings"
"github.com/dolthub/go-mysql-server/sql"
@@ -46,8 +47,20 @@ func NewStatsInitDatabaseHook(
}
statsProv.setStatDb(dbName, statsDb)
} else {
dSess := dsess.DSessFromSess(ctx.Session)
for _, br := range statsDb.Branches() {
if ok, err := statsDb.SchemaChange(ctx, br); err != nil {
branchQDbName := BranchQualifiedDatabase(dbName, br)
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
if err != nil {
ctx.GetLogger().Logger.Errorf("branch not found: %s", br)
continue
}
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
if !ok {
return fmt.Errorf("branch/database not found: %s", branchQDbName)
}
if ok, err := statsDb.SchemaChange(ctx, br, branchQDb); err != nil {
return err
} else if ok {
if err := statsDb.DeleteBranchStats(ctx, br, true); err != nil {

View File

@@ -63,7 +63,7 @@ type Database interface {
// SchemaChange returns false if any table schema in the session
// root is incompatible with the latest schema used to create a stored
// set of statistics.
SchemaChange(ctx *sql.Context, branch string) (bool, error)
SchemaChange(ctx *sql.Context, branch string, branchQdb dsess.SqlDatabase) (bool, error)
}
// StatsFactory instances construct statistic databases.

View File

@@ -162,6 +162,15 @@ func (p *Provider) ThreadStatus(dbName string) string {
return "no active stats thread"
}
func (p *Provider) TrackedBranches(dbName string) []string {
db, ok := p.getStatDb(dbName)
if !ok {
return nil
}
return db.Branches()
}
func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()

View File

@@ -244,7 +244,7 @@ var DoltSystemVariables = []sql.SystemVariable{
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
Default: 120,
Default: 600,
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBranches,

View File

@@ -123,8 +123,6 @@ func cloneTuple(pool pool.BuffPool, tup Tuple) Tuple {
func allocateTuple(pool pool.BuffPool, bufSz ByteSize, fields int) (tup Tuple, offs offsets) {
offSz := offsetsSize(fields)
tup = pool.Get(uint64(bufSz + offSz + countSize))
// todo: this causes panics still
//tup = sql.SingletonBuf.GetFull(int(bufSz + offSz + countSize))
writeFieldCount(tup, fields)
offs = offsets(tup[bufSz : bufSz+offSz])