[stats] Rewrite stat management to use single threaded event loop (#8815)

* [stats] event loop

* more progress

* basic scheduler test working

* analyze

* add/drop hooks

* gc

* delete an alter

* drop index and table

* fix other tests

* branch management

* starter for kv

* gc and refactor maintanance

* fix bucket doubling

* delete log

* better bucket counting

* test for disk round trip

* more prolly stats gc tests

* rotate backing stats db

* progress towards swapping old for new, deleting old code

* fix gc bucket overflow

* test for gc overflow

* org and closers

* save progress update

* finally get first two bats running

* startup bound hash issue

* rewrite GC to be synchronous, fix more bugs

* fix session freshness

* fix branch gc

* cache writes and gc are serialized

* fix gc/branch update dropped hashes

* fix gc race, doubling race, jobs race

* fix more races

* docs

* convert bats to script tests

* more tests, purge/stop

* validate

* docs

* some PR cleanup

* more cleanup

* stash for pull

* fix bucket hash conflicts

* Fix more collection bugs.

* bump, timer proc

* more test fixes

* cache bats changes

* Another deadlock

* delete comment

* fmt

* no read replica stats

* fix plan tests

* branch qualified analyze fix

* [no-release-notes] go: statspro/jobqueue: Create a SerialQueue, which can perform asynchronous work on a worker thread.

* go: statspro/jobqueue: A bit of cleanup, fix a flakey test.

* rewrite with GDQ

* prog

* tests run

* fix info and storage

* outline for gc impl

* fix tests and races

* bump

* better error and panic management

* better start/stop/wait

* Add rate limiting

* gc ticker

* docs

* doc

* test prog

* fix more tests

* finish up listener tests

* add comments

* gc concurrency

* enginetests and statspro tests passing

* simplify listeners

* bats progress

* small edits

* tests progress

* bats are running

* fmt

* build

* edits

* fix interface

* fix build

* stats alternate index types

* fix mem test

* build

* fix more tests

* fmt

* more fmt

* copyright

* license

* fix races

* syntax error

* fix windows path

* nil mcv panic

* fix test races

* bump def job interval to 30ms

* deterministic tests

* more tests

* TEMP COMMIT: valctx plus some other stuff...

* shorter concurrency tests

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* nondeterministic test

* try to make queue tests less racy

* missed one start

* stats granular session locks

* simplify a little

* try to avoid serialq test deadlock

* try to fix flakes

* more races

* bump

* another race

* cleanup

* more cleanup

* revert ctx validation

* most zach comments

* more comments

* more race

* bump

* more race

* bump

* schemas

* skip windows racees

* standardize server config init, use background threads management

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* default stats noop

* threads management improvements

* undo change

* move stats initialization back to engine

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

---------

Co-authored-by: Aaron Son <aaron@dolthub.com>
Co-authored-by: max-hoffman <max-hoffman@users.noreply.github.com>
This commit is contained in:
Maximilian Hoffman
2025-03-20 15:56:48 -07:00
committed by GitHub
parent 0dd4217f60
commit b8b2ff1c99
94 changed files with 6645 additions and 4742 deletions
-33
View File
@@ -16782,39 +16782,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
= LICENSE 3565fbf999a10a748647f3a2f7ff9f5dfcf1af7502a30f860ef0bf98 =
================================================================================
================================================================================
= gopkg.in/errgo.v2 licensed under: =
Copyright © 2013, Roger Peppe
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of this project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
= LICENSE fdb54eb3c3cf061a91aac42ab8e6578c3c69de803c2becb0d86810a5 =
================================================================================
================================================================================
= gopkg.in/go-jose/go-jose.v2 licensed under: =
+8 -2
View File
@@ -310,8 +310,14 @@ func relateCommitToParentChunks(ctx context.Context, commit hash.Hash, groupings
from, to, err := delta.GetRowData(ctx)
f := durable.ProllyMapFromIndex(from)
t := durable.ProllyMapFromIndex(to)
f, err := durable.ProllyMapFromIndex(from)
if err != nil {
return err
}
t, err := durable.ProllyMapFromIndex(to)
if err != nil {
return err
}
if f.Node().Level() != t.Node().Level() {
continue
+31 -8
View File
@@ -16,7 +16,6 @@ package engine
import (
"context"
"fmt"
"os"
"strconv"
"strings"
@@ -45,7 +44,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
"github.com/dolthub/dolt/go/libraries/utils/config"
@@ -85,6 +83,7 @@ type SqlEngineConfig struct {
AutoGCController *dsqle.AutoGCController
BinlogReplicaController binlogreplication.BinlogReplicaController
EventSchedulerStatus eventscheduler.SchedulerStatus
StatsController sql.StatsProvider
}
// NewSqlEngine returns a SqlEngine
@@ -201,9 +200,6 @@ func NewSqlEngine(
"authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig),
})
statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider()))
engine.Analyzer.Catalog.StatsProvider = statsPro
if config.AutoGCController != nil {
err = config.AutoGCController.RunBackgroundThread(bThreads, sqlEngine.NewDefaultContext)
if err != nil {
@@ -216,8 +212,15 @@ func NewSqlEngine(
dprocedures.UseSessionAwareSafepointController = true
}
_, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsEnabled)
if enabled.(int8) == 1 {
config.StatsController = statspro.NewStatsController(logrus.StandardLogger(), mrEnv.GetEnv(mrEnv.GetFirstDatabase()))
} else {
config.StatsController = statspro.StatsNoop{}
}
engine.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{})
sessFactory := doltSessionFactory(pro, statsPro, mrEnv.Config(), bcController, gcSafepointController, config.Autocommit)
sessFactory := doltSessionFactory(pro, config.StatsController, mrEnv.Config(), bcController, gcSafepointController, config.Autocommit)
sqlEngine.provider = pro
sqlEngine.contextFactory = sqlContextFactory
sqlEngine.dsessFactory = sessFactory
@@ -236,8 +239,28 @@ func NewSqlEngine(
// configuring stats depends on sessionBuilder
// sessionBuilder needs ref to statsProv
if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil {
fmt.Fprintln(cli.CliErr, err)
if sc, ok := config.StatsController.(*statspro.StatsController); ok {
_, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly)
sc.SetMemOnly(memOnly.(int8) == 1)
pro.InitDatabaseHooks = append(pro.InitDatabaseHooks, statspro.NewInitDatabaseHook(sc))
pro.DropDatabaseHooks = append(pro.DropDatabaseHooks, statspro.NewDropDatabaseHook(sc))
var sqlDbs []sql.Database
for _, db := range dbs {
sqlDbs = append(sqlDbs, db)
}
err = sc.Init(ctx, pro, sqlEngine.NewDefaultContext, bThreads, sqlDbs)
if err != nil {
return nil, err
}
if _, paused, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsPaused); paused.(int8) == 0 {
if err = sc.Restart(); err != nil {
return nil, err
}
}
}
// Load MySQL Db information
+3 -11
View File
@@ -303,17 +303,9 @@ func ConfigureServices(
var sqlEngine *engine.SqlEngine
InitSqlEngine := &svcs.AnonService{
InitF: func(ctx context.Context) (err error) {
if statsOn, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsAutoRefreshEnabled); err != nil {
// Auto-stats is off by default for every command except
// sql-server. Unless the config specifies a specific
// behavior, enable server stats collection.
sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, 1)
} else if statsOn != "0" {
// do not bootstrap if auto-stats enabled
} else if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsBootstrapEnabled); err != nil {
// If we've disabled stats collection and config does not
// specify bootstrap behavior, enable bootstrapping.
sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 1)
if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsPaused); err != nil {
// unless otherwise specified, run stats writer alongside server
sql.SystemVariables.SetGlobal(dsess.DoltStatsPaused, 0)
}
sqlEngine, err = engine.NewSqlEngine(
ctx,
@@ -15,6 +15,7 @@
package sqlserver
import (
"fmt"
"net/http"
"os"
"path/filepath"
@@ -184,11 +185,6 @@ func TestServerBadArgs(t *testing.T) {
func TestServerGoodParams(t *testing.T) {
ctx := context.Background()
env, err := sqle.CreateEnvWithSeedData()
require.NoError(t, err)
defer func() {
assert.NoError(t, env.DoltDB(ctx).Close())
}()
tests := []servercfg.ServerConfig{
DefaultCommandLineServerConfig(),
@@ -210,6 +206,11 @@ func TestServerGoodParams(t *testing.T) {
for _, test := range tests {
t.Run(servercfg.ConfigInfo(test), func(t *testing.T) {
env, err := sqle.CreateEnvWithSeedData()
require.NoError(t, err)
defer func() {
assert.NoError(t, env.DoltDB(ctx).Close())
}()
sc := svcs.NewController()
go func(config servercfg.ServerConfig, sc *svcs.Controller) {
_, _ = Serve(context.Background(), &Config{
@@ -219,7 +220,7 @@ func TestServerGoodParams(t *testing.T) {
DoltEnv: env,
})
}(test, sc)
err := sc.WaitForStart()
err = sc.WaitForStart()
require.NoError(t, err)
conn, err := dbr.Open("mysql", servercfg.ConnectionString(test, "dbname"), nil)
require.NoError(t, err)
@@ -228,6 +229,7 @@ func TestServerGoodParams(t *testing.T) {
sc.Stop()
err = sc.WaitForStop()
assert.NoError(t, err)
fmt.Println("stop server")
})
}
}
+1 -2
View File
@@ -61,7 +61,7 @@ require (
github.com/creasty/defaults v1.6.0
github.com/dolthub/aws-sdk-go-ini-parser v0.0.0-20250305001723-2821c37f6c12
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d
github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63
github.com/esote/minmaxheap v1.0.0
github.com/goccy/go-json v0.10.2
@@ -93,7 +93,6 @@ require (
go.opentelemetry.io/otel/trace v1.32.0
golang.org/x/text v0.22.0
gonum.org/v1/plot v0.11.0
gopkg.in/errgo.v2 v2.1.0
gopkg.in/go-jose/go-jose.v2 v2.6.3
gopkg.in/yaml.v3 v3.0.1
)
+2 -3
View File
@@ -221,8 +221,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20250319212010-451ea8d003fa h1:NFbzJ4wjWRz32nz2EimbrHpRx1Xt6k+IaR8N+j4x62k=
github.com/dolthub/go-icu-regex v0.0.0-20250319212010-451ea8d003fa/go.mod h1:ylU4XjUpsMcvl/BKeRRMXSH7e7WBrPXdSLvnRJYrxEA=
github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d h1:Ra9hv9fvJkSvjihPmtQB4EMGhq9qNp08gUI/mRmF9no=
github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d/go.mod h1:9itIc5jYYDRxmchFmegPaLaqdf4XWYX6nua5HhrajgA=
github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af h1:ozgYo2hKV6uQqLxZTS+QElHTaZ8mMiKOln25jZI1gVc=
github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af/go.mod h1:9itIc5jYYDRxmchFmegPaLaqdf4XWYX6nua5HhrajgA=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE=
@@ -1186,7 +1186,6 @@ gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o=
+9
View File
@@ -386,6 +386,12 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumC
github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
github.com/dolthub/go-mysql-server v0.19.1-0.20250228215144-f8da474ecd9f h1:lwQH9jVmSiPg1DFMYB9rWyyJTMPMoBpGrYRsOGOD/hA=
github.com/dolthub/go-mysql-server v0.19.1-0.20250228215144-f8da474ecd9f/go.mod h1:JTlrabhq5TJqvlL+J3NKlm0EzTHQQugUAH6yAxWi4Ww=
github.com/dolthub/go-mysql-server v0.19.1-0.20250305230031-14a57e076a0a h1:lemFIUt0NCKIeX7vnU2yKF8UIgc0DT8zIoEUn7oy+60=
github.com/dolthub/go-mysql-server v0.19.1-0.20250305230031-14a57e076a0a/go.mod h1:yr+Vv47/YLOKMgiEY+QxHTlbIVpTuiVtkEZ5l+xruY4=
github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577 h1:SegEguMxToBn045KRHLIUlF2/jR7Y2qD6fF+3tdOfvI=
github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4=
github.com/dolthub/swiss v0.1.0 h1:EaGQct3AqeP/MjASHLiH6i4TAmgbG/c4rA6a1bzCOPc=
github.com/dolthub/swiss v0.1.0/go.mod h1:BeucyB08Vb1G9tumVN3Vp/pyY4AMUnr9p7Rz7wJ7kAQ=
github.com/dolthub/vitess v0.0.0-20241104125316-860772ba6683 h1:2/RJeUfNAXS7mbBnEr9C36htiCJHk5XldDPzhxtEsME=
github.com/dolthub/vitess v0.0.0-20241104125316-860772ba6683/go.mod h1:uBvlRluuL+SbEWTCZ68o0xvsdYZER3CEG/35INdzfJM=
github.com/dolthub/vitess v0.0.0-20241231200706-18992bb25fdc/go.mod h1:1gQZs/byeHLMSul3Lvl3MzioMtOW1je79QYGyi2fd70=
@@ -651,6 +657,8 @@ github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/sts v1.0.588/go.mod h1:
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/tag v1.0.233/go.mod h1:sX14+NSvMjOhNFaMtP2aDy6Bss8PyFXij21gpY6+DAs=
github.com/tencentyun/cos-go-sdk-v5 v0.7.42/go.mod h1:LUFnaqRmGk6pEHOaRmdn2dCZR2j0cSsM5xowWFPTPao=
github.com/thanhpk/randstr v1.0.4/go.mod h1:M/H2P1eNLZzlDwAzpkkkUvoyNNMbzRGhESZuEQk3r0U=
github.com/thepudds/swisstable v0.0.0-20221011152303-9c77dc657777 h1:5u+6YWU2faS+Sr/x8j9yalMpSDUkatNOZWXV3wMUCGQ=
github.com/thepudds/swisstable v0.0.0-20221011152303-9c77dc657777/go.mod h1:4af3KxEsswy6aTzsTcwa8QZUSh4V+80oHdp1QX9uJHA=
github.com/thlib/go-timezone-local v0.0.0-20210907160436-ef149e42d28e/go.mod h1:/Tnicc6m/lsJE0irFMA0LfIwTBo4QP7A8IfyIv4zZKI=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8 h1:ndzgwNDnKIqyCvHTXaCqh9KlOWKvBry6nuXMJmonVsE=
github.com/tombuildsstuff/giovanni v0.15.1/go.mod h1:0TZugJPEtqzPlMpuJHYfXY6Dq2uLPrXf98D2XQSxNbA=
@@ -740,6 +748,7 @@ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/cheggaaa/pb.v1 v1.0.25 h1:Ev7yu1/f6+d+b3pi5vPdRPc6nNtP1umSfcWiEfRqv6I=
gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/gcfg.v1 v1.2.3 h1:m8OOJ4ccYHnx2f4gQwpno8nAX5OGOh7RLaaz0pj3Ogs=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+8 -3
View File
@@ -105,11 +105,16 @@ func diffProllyTrees(ctx context.Context, ch chan DiffStatProgress, keyless bool
var f, t prolly.Map
if from != nil {
f = durable.ProllyMapFromIndex(from)
f, err = durable.ProllyMapFromIndex(from)
if err != nil {
return err
}
}
if to != nil {
t = durable.ProllyMapFromIndex(to)
t, err = durable.ProllyMapFromIndex(to)
if err != nil {
return err
}
}
_, fVD := f.Descriptors()
+6 -6
View File
@@ -2110,8 +2110,8 @@ func (ddb *DoltDB) AddStash(ctx context.Context, head *Commit, stash RootValue,
return err
}
func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Hash) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
func (ddb *DoltDB) SetStatistics(ctx context.Context, branch string, addr hash.Hash) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
if err != nil {
return err
}
@@ -2119,8 +2119,8 @@ func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Ha
return err
}
func (ddb *DoltDB) DropStatisics(ctx context.Context, branch string) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
func (ddb *DoltDB) DropStatisics(ctx context.Context) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
_, err = ddb.db.Delete(ctx, statsDs, "")
if err != nil {
@@ -2132,8 +2132,8 @@ func (ddb *DoltDB) DropStatisics(ctx context.Context, branch string) error {
var ErrNoStatistics = errors.New("no statistics found")
// GetStatistics returns the value of the singleton ref.StatsRef for this database
func (ddb *DoltDB) GetStatistics(ctx context.Context, branch string) (prolly.Map, error) {
ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
func (ddb *DoltDB) GetStatistics(ctx context.Context) (prolly.Map, error) {
ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
if err != nil {
return prolly.Map{}, err
}
+11 -3
View File
@@ -273,8 +273,13 @@ type prollyIndex struct {
}
// ProllyMapFromIndex unwraps the Index and returns the underlying prolly.Map.
func ProllyMapFromIndex(i Index) prolly.Map {
return i.(prollyIndex).index
func ProllyMapFromIndex(i Index) (prolly.Map, error) {
switch i := i.(type) {
case prollyIndex:
return i.index, nil
default:
return prolly.Map{}, fmt.Errorf("expected prollyIndex, found: %T", i)
}
}
// xxx: don't use this, temporary fix waiting for bigger
@@ -369,7 +374,10 @@ func (i prollyIndex) AddColumnToRows(ctx context.Context, newCol string, newSche
}
// If not, then we have to iterate over this table's rows and update all the offsets for the new column
rowMap := ProllyMapFromIndex(i)
rowMap, err := ProllyMapFromIndex(i)
if err != nil {
return nil, err
}
mutator := rowMap.Mutate()
iter, err := mutator.IterAll(ctx)
@@ -295,7 +295,10 @@ func createRowIterForTable(ctx *sql.Context, t *doltdb.Table, sch schema.Schema)
if err != nil {
return nil, err
}
rows := durable.ProllyMapFromIndex(rowData)
rows, err := durable.ProllyMapFromIndex(rowData)
if err != nil {
return nil, err
}
rowCount, err := rows.Count()
if err != nil {
return nil, err
@@ -145,7 +145,10 @@ func (table *fulltextTable) ApplyToTable(ctx *sql.Context) (*doltdb.Table, error
if err != nil {
return nil, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return nil, err
}
keyDesc, valDesc := m.Descriptors()
keyMap, valMap := ordinalMappingsFromSchema(table.SqlSch, table.Sch)
mut := m.Mutate()
@@ -403,7 +403,7 @@ func assertNomsConflicts(t *testing.T, ctx context.Context, tbl *doltdb.Table, e
func mustGetRowValueFromTable(t *testing.T, ctx context.Context, tbl *doltdb.Table, key val.Tuple) val.Tuple {
idx, err := tbl.GetRowData(ctx)
require.NoError(t, err)
m := durable.ProllyMapFromIndex(idx)
m, _ := durable.ProllyMapFromIndex(idx)
var value val.Tuple
err = m.Get(ctx, key, func(_, v val.Tuple) error {
@@ -438,7 +438,7 @@ func assertKeylessRows(t *testing.T, ctx context.Context, tbl *doltdb.Table, exp
func assertKeylessProllyRows(t *testing.T, ctx context.Context, tbl *doltdb.Table, expected []keylessEntry) {
idx, err := tbl.GetRowData(ctx)
require.NoError(t, err)
m := durable.ProllyMapFromIndex(idx)
m, _ := durable.ProllyMapFromIndex(idx)
expectedSet := mustHash128Set(expected...)
@@ -50,7 +50,10 @@ func mergeProllySecondaryIndexes(
return nil, err
}
mergedM := durable.ProllyMapFromIndex(finalRows)
mergedM, err := durable.ProllyMapFromIndex(finalRows)
if err != nil {
return nil, err
}
tryGetIdx := func(sch schema.Schema, iS durable.IndexSet, indexName string) (prolly.Map, bool, error) {
ok := sch.Indexes().Contains(indexName)
@@ -59,7 +62,10 @@ func mergeProllySecondaryIndexes(
if err != nil {
return prolly.Map{}, false, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return prolly.Map{}, false, err
}
return m, true, nil
}
return prolly.Map{}, false, nil
@@ -73,7 +73,10 @@ func mergeProllyTable(
if err != nil {
return nil, nil, err
}
leftRows := durable.ProllyMapFromIndex(lr)
leftRows, err := durable.ProllyMapFromIndex(lr)
if err != nil {
return nil, nil, err
}
valueMerger := newValueMerger(mergedSch, tm.leftSch, tm.rightSch, tm.ancSch, leftRows.Pool(), tm.ns)
if !valueMerger.leftMapping.IsIdentityMapping() {
@@ -130,7 +133,11 @@ func mergeProllyTableData(ctx *sql.Context, tm *TableMerger, finalSch schema.Sch
if err != nil {
return nil, nil, err
}
leftEditor := durable.ProllyMapFromIndex(lr).Rewriter(finalSch.GetKeyDescriptor(ns), finalSch.GetValueDescriptor(ns))
lIdx, err := durable.ProllyMapFromIndex(lr)
if err != nil {
return nil, nil, err
}
leftEditor := lIdx.Rewriter(finalSch.GetKeyDescriptor(ns), finalSch.GetValueDescriptor(ns))
ai, err := mergeTbl.GetArtifacts(ctx)
if err != nil {
@@ -331,19 +338,27 @@ func threeWayDiffer(ctx context.Context, tm *TableMerger, valueMerger *valueMerg
if err != nil {
return nil, err
}
leftRows := durable.ProllyMapFromIndex(lr)
leftRows, err := durable.ProllyMapFromIndex(lr)
if err != nil {
return nil, err
}
rr, err := tm.rightTbl.GetRowData(ctx)
if err != nil {
return nil, err
}
rightRows := durable.ProllyMapFromIndex(rr)
rightRows, err := durable.ProllyMapFromIndex(rr)
if err != nil {
return nil, err
}
ar, err := tm.ancTbl.GetRowData(ctx)
if err != nil {
return nil, err
}
ancRows := durable.ProllyMapFromIndex(ar)
ancRows, err := durable.ProllyMapFromIndex(ar)
if err != nil {
return nil, err
}
return tree.NewThreeWayDiffer(
ctx,
@@ -534,7 +549,10 @@ func newUniqValidator(ctx *sql.Context, sch schema.Schema, tm *TableMerger, vm *
if err != nil {
return uniqValidator{}, err
}
clustered := durable.ProllyMapFromIndex(rows)
clustered, err := durable.ProllyMapFromIndex(rows)
if err != nil {
return uniqValidator{}, err
}
indexes, err := tm.leftTbl.GetIndexSet(ctx)
if err != nil {
@@ -552,7 +570,10 @@ func newUniqValidator(ctx *sql.Context, sch schema.Schema, tm *TableMerger, vm *
if err != nil {
return uniqValidator{}, err
}
secondary := durable.ProllyMapFromIndex(idx)
secondary, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return uniqValidator{}, err
}
u, err := newUniqIndex(ctx, sch, tm.name.Name, def, clustered, secondary)
if err != nil {
+7 -3
View File
@@ -332,14 +332,18 @@ func TestMergeCommits(t *testing.T) {
artifacts := durable.ProllyMapFromArtifactIndex(artIdx)
MustEqualArtifactMap(t, expectedArtifacts, artifacts)
MustEqualProlly(t, tableName, durable.ProllyMapFromIndex(expectedRows), durable.ProllyMapFromIndex(mergedRows))
idx1, _ := durable.ProllyMapFromIndex(expectedRows)
idx2, _ := durable.ProllyMapFromIndex(mergedRows)
MustEqualProlly(t, tableName, idx1, idx2)
for _, index := range sch.Indexes().AllIndexes() {
mergedIndexRows, err := merged.table.GetIndexRowData(ctx, index.Name())
require.NoError(t, err)
expectedIndexRows, err := expected.GetIndexRowData(ctx, index.Name())
require.NoError(t, err)
MustEqualProlly(t, index.Name(), durable.ProllyMapFromIndex(expectedIndexRows), durable.ProllyMapFromIndex(mergedIndexRows))
idx1, _ := durable.ProllyMapFromIndex(expectedIndexRows)
idx2, _ := durable.ProllyMapFromIndex(mergedIndexRows)
MustEqualProlly(t, index.Name(), idx1, idx2)
}
h, err := merged.table.HashOf()
@@ -635,7 +639,7 @@ func rebuildAllProllyIndexes(ctx *sql.Context, tbl *doltdb.Table) (*doltdb.Table
if err != nil {
return nil, err
}
primary := durable.ProllyMapFromIndex(tableRowData)
primary, _ := durable.ProllyMapFromIndex(tableRowData)
for _, index := range sch.Indexes().AllIndexes() {
rebuiltIndexRowData, err := creation.BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, index, primary)
@@ -35,7 +35,10 @@ func GetMutableSecondaryIdxs(ctx *sql.Context, ourSch, sch schema.Schema, tableN
if err != nil {
return nil, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return nil, err
}
mods[i], err = NewMutableSecondaryIdx(ctx, m, ourSch, sch, tableName, index)
if err != nil {
return nil, err
@@ -68,7 +71,10 @@ func GetMutableSecondaryIdxsWithPending(ctx *sql.Context, ns tree.NodeStore, our
if err != nil {
return nil, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return nil, err
}
// If the schema has changed, don't reuse the index.
// TODO: This isn't technically required, but correctly handling updating secondary indexes when only some
+16 -4
View File
@@ -361,7 +361,10 @@ func parentFkConstraintViolations(
return nomsParentFkConstraintViolations(ctx, vr, foreignKey, postParent, postChild, preParent.Schema, m, receiver)
}
if preParent.IndexData == nil || postParent.Schema.GetPKCols().Size() == 0 || preParent.Schema.GetPKCols().Size() == 0 {
m := durable.ProllyMapFromIndex(preParentRowData)
m, err := durable.ProllyMapFromIndex(preParentRowData)
if err != nil {
return err
}
return prollyParentPriDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
}
empty, err := preParentRowData.Empty()
@@ -377,7 +380,10 @@ func parentFkConstraintViolations(
} else {
idx = preParent.IndexData
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return err
}
return prollyParentSecDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
}
@@ -396,7 +402,10 @@ func childFkConstraintViolations(
return nomsChildFkConstraintViolations(ctx, vr, foreignKey, postParent, postChild, preChild.Schema, m, receiver)
}
if preChild.IndexData == nil || postChild.Schema.GetPKCols().Size() == 0 || preChild.Schema.GetPKCols().Size() == 0 {
m := durable.ProllyMapFromIndex(preChildRowData)
m, err := durable.ProllyMapFromIndex(preChildRowData)
if err != nil {
return err
}
return prollyChildPriDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
}
empty, err := preChildRowData.Empty()
@@ -412,7 +421,10 @@ func childFkConstraintViolations(
} else {
idx = preChild.IndexData
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return err
}
return prollyChildSecDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
}
@@ -38,19 +38,29 @@ func prollyParentSecDiffFkConstraintViolations(
postParent, postChild *constraintViolationsLoadedTable,
preParentSecIdx prolly.Map,
receiver FKViolationReceiver) error {
postParentRowData := durable.ProllyMapFromIndex(postParent.RowData)
postParentSecIdx := durable.ProllyMapFromIndex(postParent.IndexData)
childSecIdx := durable.ProllyMapFromIndex(postChild.IndexData)
postParentRowData, err := durable.ProllyMapFromIndex(postParent.RowData)
if err != nil {
return err
}
postParentSecIdx, err := durable.ProllyMapFromIndex(postParent.IndexData)
if err != nil {
return err
}
childSecIdx, err := durable.ProllyMapFromIndex(postChild.IndexData)
if err != nil {
return err
}
parentSecKD, _ := postParentSecIdx.Descriptors()
parentPrefixKD := parentSecKD.PrefixDesc(len(foreignKey.TableColumns))
partialKB := val.NewTupleBuilder(parentPrefixKD)
childPriIdx := durable.ProllyMapFromIndex(postChild.RowData)
childPriIdx, err := durable.ProllyMapFromIndex(postChild.RowData)
if err != nil {
return err
}
childPriKD, _ := childPriIdx.Descriptors()
var err error
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
considerAllRowsModified := false
err = prolly.DiffMaps(ctx, preParentSecIdx, postParentSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
@@ -95,20 +105,32 @@ func prollyParentPriDiffFkConstraintViolations(
postParent, postChild *constraintViolationsLoadedTable,
preParentRowData prolly.Map,
receiver FKViolationReceiver) error {
postParentRowData := durable.ProllyMapFromIndex(postParent.RowData)
postParentIndexData := durable.ProllyMapFromIndex(postParent.IndexData)
postParentRowData, err := durable.ProllyMapFromIndex(postParent.RowData)
if err != nil {
return err
}
postParentIndexData, err := durable.ProllyMapFromIndex(postParent.IndexData)
if err != nil {
return err
}
idxDesc, _ := postParentIndexData.Descriptors()
partialDesc := idxDesc.PrefixDesc(len(foreignKey.TableColumns))
partialKB := val.NewTupleBuilder(partialDesc)
childPriIdx := durable.ProllyMapFromIndex(postChild.RowData)
childScndryIdx := durable.ProllyMapFromIndex(postChild.IndexData)
childPriIdx, err := durable.ProllyMapFromIndex(postChild.RowData)
if err != nil {
return err
}
childScndryIdx, err := durable.ProllyMapFromIndex(postChild.IndexData)
if err != nil {
return err
}
primaryKD, _ := childPriIdx.Descriptors()
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
considerAllRowsModified := false
err := prolly.DiffMaps(ctx, preParentRowData, postParentRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
err = prolly.DiffMaps(ctx, preParentRowData, postParentRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
switch diff.Type {
case tree.RemovedDiff, tree.ModifiedDiff:
partialKey, hadNulls := makePartialKey(partialKB, foreignKey.ReferencedTableColumns, postParent.Index, postParent.Schema, val.Tuple(diff.Key), val.Tuple(diff.From), preParentRowData.Pool())
@@ -159,8 +181,14 @@ func prollyChildPriDiffFkConstraintViolations(
postParent, postChild *constraintViolationsLoadedTable,
preChildRowData prolly.Map,
receiver FKViolationReceiver) error {
postChildRowData := durable.ProllyMapFromIndex(postChild.RowData)
parentScndryIdx := durable.ProllyMapFromIndex(postParent.IndexData)
postChildRowData, err := durable.ProllyMapFromIndex(postChild.RowData)
if err != nil {
return err
}
parentScndryIdx, err := durable.ProllyMapFromIndex(postParent.IndexData)
if err != nil {
return err
}
idxDesc, _ := parentScndryIdx.Descriptors()
partialDesc := idxDesc.PrefixDesc(len(foreignKey.TableColumns))
@@ -168,7 +196,7 @@ func prollyChildPriDiffFkConstraintViolations(
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
considerAllRowsModified := false
err := prolly.DiffMaps(ctx, preChildRowData, postChildRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
err = prolly.DiffMaps(ctx, preChildRowData, postChildRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
switch diff.Type {
case tree.AddedDiff, tree.ModifiedDiff:
k, v := val.Tuple(diff.Key), val.Tuple(diff.To)
@@ -207,9 +235,18 @@ func prollyChildSecDiffFkConstraintViolations(
postParent, postChild *constraintViolationsLoadedTable,
preChildSecIdx prolly.Map,
receiver FKViolationReceiver) error {
postChildRowData := durable.ProllyMapFromIndex(postChild.RowData)
postChildSecIdx := durable.ProllyMapFromIndex(postChild.IndexData)
parentSecIdx := durable.ProllyMapFromIndex(postParent.IndexData)
postChildRowData, err := durable.ProllyMapFromIndex(postChild.RowData)
if err != nil {
return err
}
postChildSecIdx, err := durable.ProllyMapFromIndex(postChild.IndexData)
if err != nil {
return err
}
parentSecIdx, err := durable.ProllyMapFromIndex(postParent.IndexData)
if err != nil {
return err
}
parentSecIdxDesc, _ := parentSecIdx.Descriptors()
prefixDesc := parentSecIdxDesc.PrefixDesc(len(foreignKey.TableColumns))
@@ -218,7 +255,7 @@ func prollyChildSecDiffFkConstraintViolations(
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
considerAllRowsModified := false
err := prolly.DiffMaps(ctx, preChildSecIdx, postChildSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
err = prolly.DiffMaps(ctx, preChildSecIdx, postChildSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
switch diff.Type {
case tree.AddedDiff, tree.ModifiedDiff:
k := val.Tuple(diff.Key)
+8 -2
View File
@@ -405,7 +405,10 @@ func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTb
if err != nil {
return nil, err
}
newParentRows := durable.ProllyMapFromIndex(idx)
newParentRows, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return nil, err
}
oldParentSet, err := oldParentTbl.GetIndexSet(ctx)
if err != nil {
@@ -582,7 +585,10 @@ func migrateIndexSet(
if err != nil {
return nil, err
}
newParent := durable.ProllyMapFromIndex(idx)
newParent, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return nil, err
}
newIdx, err := migrateIndex(ctx, def.Schema(), oldParent, old, newParent, ns)
if err != nil {
+1 -1
View File
@@ -205,7 +205,7 @@ func Parse(str string) (DoltRef, error) {
}
if prefix := PrefixForType(StatsRefType); strings.HasPrefix(str, prefix) {
return NewStatsRef(str[len(prefix):]), nil
return NewStatsRef(), nil
}
if prefix := PrefixForType(TupleRefType); strings.HasPrefix(str, prefix) {
+4 -2
View File
@@ -20,9 +20,11 @@ type StatsRef struct {
var _ DoltRef = StatsRef{}
const statsBranch = "main"
// NewStatsRef creates a reference to a statistic dataset head.
func NewStatsRef(branch string) StatsRef {
return StatsRef{branch}
func NewStatsRef() StatsRef {
return StatsRef{statsBranch}
}
// GetType will return StatsRefType
@@ -15,7 +15,7 @@
package reliable
import (
"github.com/dolthub/dolt/go/libraries/doltcore/remotestorage/internal/circular"
"github.com/dolthub/dolt/go/libraries/utils/circular"
)
// A reliable.Chan is a type of channel transformer which can be used to build
+9 -15
View File
@@ -24,12 +24,12 @@ import (
const StatsVersion int64 = 1
const (
StatsQualifierColName = "qualifier"
StatsDbColName = "database_name"
StatsTableColName = "table_name"
StatsIndexColName = "index_name"
StatsPositionColName = "position"
StatsBranchName = "branch"
StatsCommitHashColName = "commit_hash"
StatsPrefixLenName = "prefix_len"
StatsRowCountColName = "row_count"
StatsDistinctCountColName = "distinct_count"
StatsNullCountColName = "null_count"
@@ -42,7 +42,7 @@ const (
StatsMcv2ColName = "mcv2"
StatsMcv3ColName = "mcv3"
StatsMcv4ColName = "mcv4"
StatsMcvCountsColName = "mcvCounts"
StatsMcvCountsColName = "mcv_counts"
StatsVersionColName = "version"
)
@@ -52,6 +52,7 @@ const (
StatsIndexTag
StatsPositionTag
StatsVersionTag
StatsPrefixLenTag
StatsCommitHashTag
StatsRowCountTag
StatsDistinctCountTag
@@ -71,9 +72,9 @@ const (
func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
return sql.PrimaryKeySchema{
Schema: sql.Schema{
&sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsDbColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsTableColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsIndexColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName},
@@ -88,7 +89,6 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
&sql.Column{Name: StatsMcv4ColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsMcvCountsColName, Type: types.Text, DatabaseSource: dbName},
},
PkOrdinals: []int{0, 1},
}
}
@@ -96,20 +96,14 @@ var StatsTableDoltSchema = StatsTableDoltSchemaGen()
func StatsTableDoltSchemaGen() Schema {
colColl := NewColCollection(
NewColumn(StatsDbColName, StatsDbTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsTableColName, StatsTableTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsIndexColName, StatsIndexTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsPositionColName, StatsPositionTag, stypes.IntKind, true, NotNullConstraint{}),
NewColumn(StatsPrefixLenName, StatsPrefixLenTag, stypes.IntKind, true, NotNullConstraint{}),
NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsVersionColName, StatsVersionTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsRowCountColName, StatsRowCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsDistinctCountColName, StatsDistinctCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsNullCountColName, StatsNullCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsColumnsColName, StatsColumnsTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsTypesColName, StatsTypesTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundColName, StatsUpperBoundTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundCntColName, StatsUpperBoundCntTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsCreatedAtColName, StatsCreatedAtTag, stypes.TimestampKind, false, NotNullConstraint{}),
NewColumn(StatsMcv1ColName, StatsMcv1Tag, stypes.StringKind, false),
NewColumn(StatsMcv2ColName, StatsMcv2Tag, stypes.StringKind, false),
NewColumn(StatsMcv3ColName, StatsMcv3Tag, stypes.StringKind, false),
@@ -377,10 +377,16 @@ func (b *binlogProducer) createRowEvents(ctx *sql.Context, tableDeltas []diff.Ta
var fromMap, toMap prolly.Map
if fromRowData != nil {
fromMap = durable.ProllyMapFromIndex(fromRowData)
fromMap, err = durable.ProllyMapFromIndex(fromRowData)
if err != nil {
return nil, err
}
}
if toRowData != nil {
toMap = durable.ProllyMapFromIndex(toRowData)
toMap, err = durable.ProllyMapFromIndex(toRowData)
if err != nil {
return nil, err
}
}
sch, err := tableDelta.ToTable.GetSchema(ctx)
@@ -162,6 +162,10 @@ func (db database) RequestedName() string {
return db.Name()
}
func (db database) AliasedName() string {
return db.Name()
}
type noopRepoStateWriter struct{}
var _ env.RepoStateWriter = noopRepoStateWriter{}
+3
View File
@@ -694,6 +694,9 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds
if err != nil {
return nil, false, err
}
if branch == "" {
branch = db.Revision()
}
dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.schemaName, branch, tables), true
case doltdb.ProceduresTableName:
found = true
@@ -985,7 +985,7 @@ func (p *DoltDatabaseProvider) databaseForRevision(ctx *sql.Context, revisionQua
}
}
db, err := revisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
db, err := RevisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
// preserve original user case in the case of not found
if sql.ErrDatabaseNotFound.Is(err) {
return nil, false, sql.ErrDatabaseNotFound.New(revisionQualifiedName)
@@ -1526,8 +1526,8 @@ func isTag(ctx context.Context, db dsess.SqlDatabase, tagName string) (string, b
return "", false, nil
}
// revisionDbForBranch returns a new database that is tied to the branch named by revSpec
func revisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
// RevisionDbForBranch returns a new database that is tied to the branch named by revSpec
func RevisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
static := staticRepoState{
branch: ref.NewBranchRef(revSpec),
RepoStateWriter: srcDb.DbData().Rsw,
@@ -73,7 +73,11 @@ func getProllyRowMaps(ctx *sql.Context, vrw types.ValueReadWriter, ns tree.NodeS
return prolly.Map{}, err
}
return durable.ProllyMapFromIndex(idx), nil
pm, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return prolly.Map{}, err
}
return pm, nil
}
func resolveProllyConflicts(ctx *sql.Context, tbl *doltdb.Table, tblName string, ourSch, sch schema.Schema) (*doltdb.Table, error) {
@@ -94,7 +98,10 @@ func resolveProllyConflicts(ctx *sql.Context, tbl *doltdb.Table, tblName string,
if err != nil {
return nil, err
}
ourMap := durable.ProllyMapFromIndex(ourIdx)
ourMap, err := durable.ProllyMapFromIndex(ourIdx)
if err != nil {
return nil, err
}
mutMap := ourMap.Mutate()
// get mutable secondary indexes
@@ -47,12 +47,15 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{
{Name: "dolt_tag", Schema: int64Schema("status"), Function: doltTag},
{Name: "dolt_verify_constraints", Schema: int64Schema("violations"), Function: doltVerifyConstraints},
{Name: "dolt_stats_drop", Schema: statsFuncSchema, Function: statsFunc(statsDrop)},
{Name: "dolt_stats_restart", Schema: statsFuncSchema, Function: statsFunc(statsRestart)},
{Name: "dolt_stats_stop", Schema: statsFuncSchema, Function: statsFunc(statsStop)},
{Name: "dolt_stats_status", Schema: statsFuncSchema, Function: statsFunc(statsStatus)},
{Name: "dolt_stats_prune", Schema: statsFuncSchema, Function: statsFunc(statsPrune)},
{Name: "dolt_stats_info", Schema: statsFuncSchema, Function: statsFunc(statsInfo)},
{Name: "dolt_stats_purge", Schema: statsFuncSchema, Function: statsFunc(statsPurge)},
{Name: "dolt_stats_wait", Schema: statsFuncSchema, Function: statsFunc(statsWait)},
{Name: "dolt_stats_flush", Schema: statsFuncSchema, Function: statsFunc(statsFlush)},
{Name: "dolt_stats_once", Schema: statsFuncSchema, Function: statsFunc(statsOnce)},
{Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)},
{Name: "dolt_stats_timers", Schema: statsFuncSchema, Function: statsFunc(statsTimers)},
}
// stringSchema returns a non-nullable schema with all columns as LONGTEXT.
@@ -15,14 +15,14 @@
package dprocedures
import (
"context"
"encoding/json"
"fmt"
"strings"
"strconv"
"github.com/dolthub/go-mysql-server/sql"
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
@@ -34,9 +34,16 @@ var statsFuncSchema = []*sql.Column{
},
}
func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
return func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
res, err := fn(ctx)
const OkResult = "Ok"
func statsFunc(fn func(ctx *sql.Context, args ...string) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
return func(ctx *sql.Context, args ...string) (iter sql.RowIter, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("stats function unexpectedly panicked: %s", r)
}
}()
res, err := fn(ctx, args...)
if err != nil {
return nil, err
}
@@ -44,124 +51,211 @@ func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Con
}
}
// AutoRefreshStatsProvider is a sql.StatsProvider that exposes hooks for
// StatsInfo gives a summary of the current coordinator stats.
type StatsInfo struct {
DbCnt int `json:"dbCnt"`
Active bool `json:"active"`
StorageBucketCnt int `json:"storageBucketCnt"`
CachedBucketCnt int `json:"cachedBucketCnt"`
CachedBoundCnt int `json:"cachedBoundCnt"`
CachedTemplateCnt int `json:"cachedTemplateCnt"`
StatCnt int `json:"statCnt"`
GcCnt int `json:"gcCnt,omitempty"`
GenCnt int `json:"genCnt,omitempty"`
Backing string `json:"backing"`
}
// ToJson returns stats info as a json string. Use the |short|
// flag to exclude cycle counters.
func (si StatsInfo) ToJson(short bool) string {
if short {
si.GcCnt = 0
si.GenCnt = 0
}
jsonData, err := json.Marshal(si)
if err != nil {
return ""
}
return string(jsonData)
}
// ExtendedStatsProvider is a sql.StatsProvider that exposes hooks for
// observing and manipulating background database auto refresh threads.
type AutoRefreshStatsProvider interface {
type ExtendedStatsProvider interface {
sql.StatsProvider
CancelRefreshThread(string)
StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error
ThreadStatus(string) string
Prune(ctx *sql.Context) error
// Restart starts a new stats thread, finalizes any active thread
Restart() error
// Stop finalizes stats thread if active
Stop()
// Info returns summary statistics about the current coordinator state
Info(ctx context.Context) (StatsInfo, error)
// Purge wipes the memory and storage state, and pauses stats collection
Purge(ctx *sql.Context) error
// WaitForSync blocks until the stats state includes changes
// from the current session
WaitForSync(ctx context.Context) error
// Gc forces the next stats cycle to perform a GC. Block until
// the GC lands.
Gc(ctx *sql.Context) error
// WaitForFlush blocks until the next cycle finishes and flushes
// buckets to disk.
WaitForFlush(ctx *sql.Context) error
// CollectOnce performs a stats update in-thread. This will contend
// with background collection and most useful in a non-server context.
CollectOnce(ctx context.Context) (string, error)
// SetTimers is an access point for editing the statistics
// delay timer. This will block if the scheduler is not running.
SetTimers(int64, int64)
}
type BranchStatsProvider interface {
DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error
}
// statsRestart tries to stop and then start a refresh thread
func statsRestart(ctx *sql.Context) (interface{}, error) {
// statsRestart cancels any ongoing update thread and starts a new worker
func statsRestart(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
statsPro := dSess.StatsProvider()
dbName := strings.ToLower(ctx.GetCurrentDatabase())
if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
pro := dSess.Provider()
newFs, err := pro.FileSystemForDatabase(dbName)
if err != nil {
return nil, fmt.Errorf("failed to restart stats collection: %w", err)
if afp, ok := statsPro.(ExtendedStatsProvider); ok {
if err := afp.Restart(); err != nil {
return nil, err
}
dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, newFs, pro.DbFactoryUrl(), "TODO")
sqlDb, ok := pro.BaseDatabase(ctx, dbName)
if !ok {
return nil, fmt.Errorf("failed to restart stats collection: database not found: %s", dbName)
}
afp.CancelRefreshThread(dbName)
err = afp.StartRefreshThread(ctx, pro, dbName, dEnv, sqlDb)
if err != nil {
return nil, fmt.Errorf("failed to restart collection: %w", err)
}
return fmt.Sprintf("restarted stats collection: %s", ref.StatsRef{}.String()), nil
return OkResult, nil
}
return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
// statsStatus returns the last update for a stats thread
func statsStatus(ctx *sql.Context) (interface{}, error) {
// statsInfo returns a coordinator state summary
func statsInfo(ctx *sql.Context, args ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
dbName := strings.ToLower(ctx.GetCurrentDatabase())
pro := dSess.StatsProvider()
if afp, ok := pro.(AutoRefreshStatsProvider); ok {
return afp.ThreadStatus(dbName), nil
if afp, ok := pro.(ExtendedStatsProvider); ok {
var short bool
if len(args) > 0 && (args[0] == "-s" || args[0] == "--short") {
short = true
}
info, err := afp.Info(ctx)
if err != nil {
return nil, err
}
return info.ToJson(short), nil
}
return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
// statsStop cancels a refresh thread
func statsStop(ctx *sql.Context) (interface{}, error) {
// statsWait blocks until the stats worker executes two full loops
// of instructions. The second loop will include the most recent
// committed session as of this function's execution.
func statsWait(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro := dSess.StatsProvider()
if afp, ok := pro.(ExtendedStatsProvider); ok {
if err := afp.WaitForSync(ctx); err != nil {
return nil, err
}
return OkResult, nil
}
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
// statsOnce runs a one-off worker update. This is mostly used for
// testing and grabbing statistics while in the shell. Servers
// should use `dolt_stats_wait` to avoid contending with the
// background thread.
func statsOnce(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro := dSess.StatsProvider()
if afp, ok := pro.(ExtendedStatsProvider); ok {
str, err := afp.CollectOnce(ctx)
if err != nil {
return nil, err
}
return str, nil
}
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
// statsFlush waits for the next stats flush to storage.
func statsFlush(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro := dSess.StatsProvider()
if afp, ok := pro.(ExtendedStatsProvider); ok {
if err := afp.WaitForFlush(ctx); err != nil {
return nil, err
}
return OkResult, nil
}
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
// statsGc sets the |doGc| flag and waits until a worker
// performs an update/GC.
func statsGc(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro := dSess.StatsProvider()
if afp, ok := pro.(ExtendedStatsProvider); ok {
if err := afp.Gc(ctx); err != nil {
return nil, err
}
return OkResult, nil
}
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
// statsStop flushes the job queue and leaves the stats provider
// in a paused state.
func statsStop(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
statsPro := dSess.StatsProvider()
dbName := strings.ToLower(ctx.GetCurrentDatabase())
if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
afp.CancelRefreshThread(dbName)
return fmt.Sprintf("stopped thread: %s", dbName), nil
if afp, ok := statsPro.(ExtendedStatsProvider); ok {
afp.Stop()
return OkResult, nil
}
return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
// statsDrop deletes the stats ref
func statsDrop(ctx *sql.Context) (interface{}, error) {
// statsPurge flushes the job queue, deletes the current caches
// and storage targets, re-initializes the tracked database
// states, and returns with stats collection paused.
func statsPurge(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro := dSess.StatsProvider()
dbName := strings.ToLower(ctx.GetCurrentDatabase())
branch, err := dSess.GetBranch()
if err != nil {
return nil, fmt.Errorf("failed to drop stats: %w", err)
}
if afp, ok := pro.(AutoRefreshStatsProvider); ok {
// currently unsafe to drop stats while running refresh
afp.CancelRefreshThread(dbName)
}
if bsp, ok := pro.(BranchStatsProvider); ok {
err := bsp.DropBranchDbStats(ctx, branch, dbName, true)
if err != nil {
return nil, fmt.Errorf("failed to drop stats: %w", err)
}
}
return fmt.Sprintf("deleted stats ref for %s", dbName), nil
}
// statsPrune replaces the current disk contents with only the currently
// tracked in memory statistics.
func statsPrune(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
pro, ok := dSess.StatsProvider().(ExtendedStatsProvider)
if !ok {
return nil, fmt.Errorf("stats not persisted, cannot purge")
}
if err := pro.Prune(ctx); err != nil {
return "failed to prune stats databases", err
}
return "pruned all stats databases", nil
}
// statsPurge removes the stats database from disk
func statsPurge(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
if !ok {
return nil, fmt.Errorf("stats not persisted, cannot purge")
}
pro.Stop()
if err := pro.Purge(ctx); err != nil {
return "failed to purged databases", err
return "failed to purge stats", err
}
return "purged all database stats", nil
return OkResult, nil
}
// statsTimers updates the stats timers, which go into effect immediately.
func statsTimers(ctx *sql.Context, args ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
statsPro := dSess.StatsProvider()
if len(args) != 2 {
return nil, fmt.Errorf("expected timer arguments (ns): (job, gc)")
}
job, err := strconv.ParseInt(args[0], 10, 64)
if err != nil {
return nil, fmt.Errorf("interval timer must be positive intergers")
}
gc, err := strconv.ParseInt(args[1], 10, 64)
if err != nil {
return nil, fmt.Errorf("interval timer must be positive intergers")
}
if afp, ok := statsPro.(ExtendedStatsProvider); ok {
afp.SetTimers(job, gc)
return OkResult, nil
}
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
}
@@ -327,7 +327,10 @@ func (a *AutoIncrementTracker) deepSet(ctx *sql.Context, tableName string, table
func getMaxIndexValue(ctx context.Context, indexData durable.Index) (uint64, error) {
if types.IsFormat_DOLT(indexData.Format()) {
idx := durable.ProllyMapFromIndex(indexData)
idx, err := durable.ProllyMapFromIndex(indexData)
if err != nil {
return 0, err
}
iter, err := idx.IterAllReverse(ctx)
if err != nil {
@@ -122,6 +122,7 @@ type SqlDatabase interface {
sql.Database
sql.SchemaDatabase
sql.DatabaseSchema
sql.AliasedDatabase
SessionDatabase
RevisionDatabase
@@ -59,12 +59,13 @@ const (
DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch"
DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs"
DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled"
DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled"
DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold"
DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval"
DoltStatsMemoryOnly = "dolt_stats_memory_only"
DoltStatsBranches = "dolt_stats_branches"
DoltStatsEnabled = "dolt_stats_enabled"
DoltStatsPaused = "dolt_stats_paused"
DoltStatsMemoryOnly = "dolt_stats_memory_only"
DoltStatsBranches = "dolt_stats_branches"
DoltStatsJobInterval = "dolt_stats_job_interval"
DoltStatsGCInterval = "dolt_stats_gc_interval"
DoltStatsGCEnabled = "dolt_stats_gc_enabled"
)
const URLTemplateDatabasePlaceholder = "{database}"
@@ -154,7 +154,10 @@ func newProllyConflictRowIter(ctx *sql.Context, ct ProllyConflictsTable) (*proll
if err != nil {
return nil, err
}
ourRows := durable.ProllyMapFromIndex(idx)
ourRows, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return nil, err
}
itr, err := ct.artM.IterAllConflicts(ctx)
if err != nil {
@@ -424,7 +427,11 @@ func (itr *prollyConflictRowIter) loadTableMaps(ctx *sql.Context, baseHash, thei
return err
}
itr.baseRows = durable.ProllyMapFromIndex(idx)
itr.baseRows, err = durable.ProllyMapFromIndex(idx)
if err != nil {
return err
}
itr.baseHash = baseHash
}
@@ -446,7 +453,10 @@ func (itr *prollyConflictRowIter) loadTableMaps(ctx *sql.Context, baseHash, thei
if err != nil {
return err
}
itr.theirRows = durable.ProllyMapFromIndex(idx)
itr.theirRows, err = durable.ProllyMapFromIndex(idx)
if err != nil {
return err
}
itr.theirHash = theirHash
}
@@ -251,7 +251,10 @@ func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, tar
if err != nil {
return prollyDiffIter{}, err
}
from = durable.ProllyMapFromIndex(idx)
from, err = durable.ProllyMapFromIndex(idx)
if err != nil {
return prollyDiffIter{}, err
}
if fsch, err = dp.from.GetSchema(ctx); err != nil {
return prollyDiffIter{}, err
}
@@ -263,7 +266,10 @@ func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, tar
if err != nil {
return prollyDiffIter{}, err
}
to = durable.ProllyMapFromIndex(idx)
to, err = durable.ProllyMapFromIndex(idx)
if err != nil {
return prollyDiffIter{}, err
}
if tsch, err = dp.to.GetSchema(ctx); err != nil {
return prollyDiffIter{}, err
}
@@ -236,7 +236,10 @@ func newQueryCatalogEntryProlly(ctx context.Context, tbl *doltdb.Table, id, name
if err != nil {
return SavedQuery{}, nil, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return SavedQuery{}, nil, err
}
existingSQ, err := retrieveFromQueryCatalogProlly(ctx, tbl, id)
if err != nil && !ErrQueryNotFound.Is(err) {
@@ -312,7 +315,11 @@ func retrieveFromQueryCatalogProlly(ctx context.Context, tbl *doltdb.Table, id s
return SavedQuery{}, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return SavedQuery{}, err
}
kb := val.NewTupleBuilder(catalogKd)
kb.PutString(0, id)
k := kb.Build(m.Pool())
@@ -68,7 +68,7 @@ func (st *StatisticsTable) DataLength(ctx *sql.Context) (uint64, error) {
}
type BranchStatsProvider interface {
GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error)
GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error)
}
// RowCount implements sql.StatisticsTable
@@ -119,14 +119,19 @@ func (st *StatisticsTable) Partitions(*sql.Context) (sql.PartitionIter, error) {
// PartitionRows is a sql.Table interface function that gets a row iterator for a partition
func (st *StatisticsTable) PartitionRows(ctx *sql.Context, _ sql.Partition) (sql.RowIter, error) {
dSess := dsess.DSessFromSess(ctx.Session)
statsPro := dSess.StatsProvider().(BranchStatsProvider)
statsPro, ok := dSess.StatsProvider().(BranchStatsProvider)
if !ok {
return sql.RowsToRowIter(), nil
}
var dStats []sql.Statistic
for _, table := range st.tableNames {
dbStats, err := statsPro.GetTableDoltStats(ctx, st.branch, st.dbName, st.schemaName, table)
if err != nil {
return nil, err
}
dStats = append(dStats, dbStats...)
for _, s := range dbStats {
dStats = append(dStats, s)
}
}
return stats.NewStatsIter(ctx, dStats...)
}
@@ -825,7 +825,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif
if err != nil {
return workspaceDiffIter{}, err
}
base = durable.ProllyMapFromIndex(idx)
base, err = durable.ProllyMapFromIndex(idx)
if err != nil {
return workspaceDiffIter{}, err
}
}
if wp.staging != nil {
@@ -833,7 +836,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif
if err != nil {
return workspaceDiffIter{}, err
}
staging = durable.ProllyMapFromIndex(idx)
staging, err = durable.ProllyMapFromIndex(idx)
if err != nil {
return workspaceDiffIter{}, err
}
}
if wp.working != nil {
@@ -841,7 +847,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif
if err != nil {
return workspaceDiffIter{}, err
}
working = durable.ProllyMapFromIndex(idx)
working, err = durable.ProllyMapFromIndex(idx)
if err != nil {
return workspaceDiffIter{}, err
}
}
var nodeStore tree.NodeStore
@@ -392,16 +392,12 @@ func TestQueryPlans(t *testing.T) {
}
func TestIntegrationQueryPlans(t *testing.T) {
harness := newDoltEnginetestHarness(t).WithConfigureStats(true)
harness := newDoltEnginetestHarness(t)
defer harness.Close()
enginetest.TestIntegrationPlans(t, harness)
}
func TestDoltDiffQueryPlans(t *testing.T) {
if !types.IsFormat_DOLT(types.Format_Default) {
t.Skip("only new format support system table indexing")
}
harness := newDoltEnginetestHarness(t).WithParallelism(2) // want Exchange nodes
RunDoltDiffQueryPlansTest(t, harness)
}
@@ -608,7 +604,7 @@ func TestScripts(t *testing.T) {
if types.IsFormat_DOLT(types.Format_Default) {
skipped = append(skipped, newFormatSkippedScripts...)
}
h := newDoltHarness(t).WithSkippedQueries(skipped)
h := newDoltHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true)
defer h.Close()
enginetest.TestScripts(t, h)
}
@@ -685,20 +681,13 @@ func TestDoltUserPrivileges(t *testing.T) {
}
func TestJoinOps(t *testing.T) {
if types.IsFormat_LD(types.Format_Default) {
t.Skip("DOLT_LD keyless indexes are not sorted")
}
h := newDoltHarness(t)
defer h.Close()
enginetest.TestJoinOps(t, h, enginetest.DefaultJoinOpTests)
}
func TestJoinPlanning(t *testing.T) {
if types.IsFormat_LD(types.Format_Default) {
t.Skip("DOLT_LD keyless indexes are not sorted")
}
h := newDoltEnginetestHarness(t).WithConfigureStats(true)
h := newDoltEnginetestHarness(t)
defer h.Close()
enginetest.TestJoinPlanning(t, h)
}
@@ -706,7 +695,6 @@ func TestJoinPlanning(t *testing.T) {
func TestJoinQueries(t *testing.T) {
h := newDoltHarness(t)
defer h.Close()
enginetest.TestJoinQueries(t, h)
}
func TestJoinQueriesPrepared(t *testing.T) {
@@ -1458,11 +1446,6 @@ func TestStatBranchTests(t *testing.T) {
RunStatBranchTests(t, harness)
}
func TestStatsFunctions(t *testing.T) {
harness := newDoltEnginetestHarness(t)
RunStatsFunctionsTest(t, harness)
}
func TestDiffTableFunction(t *testing.T) {
harness := newDoltEnginetestHarness(t)
RunDiffTableFunctionTests(t, harness)
@@ -1669,11 +1652,6 @@ func TestStatsStorage(t *testing.T) {
RunStatsStorageTests(t, h)
}
func TestStatsIOWithoutReload(t *testing.T) {
h := newDoltEnginetestHarness(t)
RunStatsIOTestsWithoutReload(t, h)
}
func TestJoinStats(t *testing.T) {
h := newDoltEnginetestHarness(t)
RunJoinStatsTests(t, h)
@@ -1744,7 +1722,7 @@ func TestScriptsPrepared(t *testing.T) {
skipped = append(skipped, newFormatSkippedScripts...)
}
skipPreparedTests(t)
h := newDoltHarness(t).WithSkippedQueries(skipped)
h := newDoltHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true)
defer h.Close()
enginetest.TestScriptsPrepared(t, h)
}
@@ -1945,6 +1923,10 @@ func TestCreateDatabaseErrorCleansUp(t *testing.T) {
// (2) auto refresh threads, and (3) manual ANALYZE statements.
// todo: the dolt_stat functions should be concurrency tested
func TestStatsAutoRefreshConcurrency(t *testing.T) {
if runtime.GOOS == "windows" && os.Getenv("CI") != "" {
t.Skip("Racy on Windows CI.")
}
// create engine
harness := newDoltHarness(t)
harness.Setup(setup.MydbData)
@@ -1959,21 +1941,16 @@ func TestStatsAutoRefreshConcurrency(t *testing.T) {
// Setting an interval of 0 and a threshold of 0 will result
// in the stats being updated after every operation
intervalSec := time.Duration(0)
thresholdf64 := 0.
bThreads := sql.NewBackgroundThreads()
branches := []string{"main"}
statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.Provider)
statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.StatsController)
// it is important to use new sessions for this test, to avoid working root conflicts
readCtx := enginetest.NewSession(harness)
writeCtx := enginetest.NewSession(harness)
refreshCtx := enginetest.NewSession(harness)
newCtx := func(context.Context) (*sql.Context, error) {
return refreshCtx, nil
}
err := statsProv.InitAutoRefreshWithParams(newCtx, sqlDb.Name(), bThreads, intervalSec, thresholdf64, branches)
fs, err := engine.EngineAnalyzer().Catalog.DbProvider.(*sqle.DoltDatabaseProvider).FileSystemForDatabase(sqlDb.AliasedName())
require.NoError(t, err)
err = statsProv.AddFs(readCtx, sqlDb, fs, true)
require.NoError(t, err)
execQ := func(ctx *sql.Context, q string, id int, tag string) {
@@ -234,41 +234,8 @@ func RunVersionedQueriesTest(t *testing.T, h DoltEnginetestHarness) {
}
func RunQueryTestPlans(t *testing.T, harness DoltEnginetestHarness) {
// Dolt supports partial keys, so the index matched is different for some plans
// TODO: Fix these differences by implementing partial key matching in the memory tables, or the engine itself
skipped := []string{
"SELECT pk,pk1,pk2 FROM one_pk LEFT JOIN two_pk ON pk=pk1",
"SELECT pk,pk1,pk2 FROM one_pk JOIN two_pk ON pk=pk1",
"SELECT one_pk.c5,pk1,pk2 FROM one_pk JOIN two_pk ON pk=pk1 ORDER BY 1,2,3",
"SELECT opk.c5,pk1,pk2 FROM one_pk opk JOIN two_pk tpk ON opk.pk=tpk.pk1 ORDER BY 1,2,3",
"SELECT opk.c5,pk1,pk2 FROM one_pk opk JOIN two_pk tpk ON pk=pk1 ORDER BY 1,2,3",
"SELECT pk,pk1,pk2 FROM one_pk LEFT JOIN two_pk ON pk=pk1 ORDER BY 1,2,3",
"SELECT pk,pk1,pk2 FROM one_pk t1, two_pk t2 WHERE pk=1 AND pk2=1 AND pk1=1 ORDER BY 1,2",
}
// Parallelism introduces Exchange nodes into the query plans, so disable.
// TODO: exchange nodes should really only be part of the explain plan under certain debug settings
harness = harness.NewHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true)
if !types.IsFormat_DOLT(types.Format_Default) {
// only new format supports reverse IndexTableAccess
reverseIndexSkip := []string{
"SELECT * FROM one_pk ORDER BY pk",
"SELECT * FROM two_pk ORDER BY pk1, pk2",
"SELECT * FROM two_pk ORDER BY pk1",
"SELECT pk1 AS one, pk2 AS two FROM two_pk ORDER BY pk1, pk2",
"SELECT pk1 AS one, pk2 AS two FROM two_pk ORDER BY one, two",
"SELECT i FROM (SELECT i FROM mytable ORDER BY i DESC LIMIT 1) sq WHERE i = 3",
"SELECT i FROM (SELECT i FROM (SELECT i FROM mytable ORDER BY DES LIMIT 1) sql1)sql2 WHERE i = 3",
"SELECT s,i FROM mytable order by i DESC",
"SELECT s,i FROM mytable as a order by i DESC",
"SELECT pk1, pk2 FROM two_pk order by pk1 asc, pk2 asc",
"SELECT pk1, pk2 FROM two_pk order by pk1 desc, pk2 desc",
"SELECT i FROM (SELECT i FROM (SELECT i FROM mytable ORDER BY i DESC LIMIT 1) sq1) sq2 WHERE i = 3",
}
harness = harness.WithSkippedQueries(reverseIndexSkip)
}
harness = harness.NewHarness(t)
defer harness.Close()
sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0)
enginetest.TestQueryPlans(t, harness, queries.PlanTests)
}
@@ -1165,21 +1132,6 @@ func mustNewEngine(t *testing.T, h enginetest.Harness) enginetest.QueryEngine {
return e
}
func RunStatsFunctionsTest(t *testing.T, harness DoltEnginetestHarness) {
defer harness.Close()
for _, test := range StatProcTests {
t.Run(test.Name, func(t *testing.T) {
// reset engine so provider statistics are clean
harness = harness.NewHarness(t).WithConfigureStats(true)
harness.Setup(setup.MydbData)
harness.SkipSetupCommit()
e := mustNewEngine(t, harness)
defer e.Close()
enginetest.TestScriptWithEngine(t, e, harness, test)
})
}
}
func RunDiffTableFunctionTests(t *testing.T, harness DoltEnginetestHarness) {
for _, test := range DiffTableFunctionScriptTests {
t.Run(test.Name, func(t *testing.T) {
@@ -1559,30 +1511,15 @@ func RunStatsHistogramTests(t *testing.T, h DoltEnginetestHarness) {
}
func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
for _, script := range DoltHistogramTests {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
e := mustNewEngine(t, h)
if enginetest.IsServerEngine(e) {
return
}
defer e.Close()
TestProviderReloadScriptWithEngine(t, e, h, script)
}()
}
}
func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
e := mustNewEngine(t, h)
if enginetest.IsServerEngine(e) {
return
}
defer e.Close()
enginetest.TestScriptWithEngine(t, e, h, script)
}()
}
@@ -20,6 +20,7 @@ import (
"runtime"
"strings"
"testing"
"time"
gms "github.com/dolthub/go-mysql-server"
"github.com/dolthub/go-mysql-server/enginetest"
@@ -28,6 +29,7 @@ import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/mysql_db"
"github.com/dolthub/go-mysql-server/sql/rowexec"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
@@ -36,7 +38,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
@@ -46,7 +47,7 @@ import (
type DoltHarness struct {
t *testing.T
provider dsess.DoltDatabaseProvider
statsPro sql.StatsProvider
statsPro *statspro.StatsController
multiRepoEnv *env.MultiRepoEnv
session *dsess.DoltSession
branchControl *branch_control.Controller
@@ -59,6 +60,7 @@ type DoltHarness struct {
setupDbs map[string]struct{}
skipSetupCommit bool
configureStats bool
statsThreads *sql.BackgroundThreads
useLocalFilesystem bool
setupTestProcedures bool
}
@@ -242,12 +244,19 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
}
doltProvider, ok := pro.(*sqle.DoltDatabaseProvider)
require.True(t, ok)
d.provider = doltProvider
d.gcSafepointController = dsess.NewGCSafepointController()
statsProv := statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
d.statsPro = statsProv
bThreads := sql.NewBackgroundThreads()
ctxGen := func(ctx context.Context) (*sql.Context, error) {
client := sql.Client{Address: "localhost", User: "root"}
return sql.NewContext(context.Background(), sql.WithSession(d.newSessionWithClient(client))), nil
}
statsPro := statspro.NewStatsController(logrus.StandardLogger(), d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase()))
d.statsPro = statsPro
var err error
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, d.gcSafepointController)
@@ -262,6 +271,7 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
sqlCtx := enginetest.NewContext(d)
databases := pro.AllDatabases(sqlCtx)
d.setupDbs = make(map[string]struct{})
var dbs []string
for _, db := range databases {
@@ -281,41 +291,45 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
require.NoError(t, err)
}
e = e.WithBackgroundThreads(bThreads)
// xxx: stats threads can't be tied to single test cycle,
// this is only OK for enginetests
statsThreads := sql.NewBackgroundThreads()
if d.configureStats {
bThreads := sql.NewBackgroundThreads()
e = e.WithBackgroundThreads(bThreads)
dSess := dsess.DSessFromSess(sqlCtx.Session)
dbCache := dSess.DatabaseCache(sqlCtx)
dsessDbs := make([]dsess.SqlDatabase, len(dbs))
for i, dbName := range dbs {
dsessDbs[i], _ = dbCache.GetCachedRevisionDb(fmt.Sprintf("%s/main", dbName), dbName)
err = statsPro.Init(ctx, doltProvider, ctxGen, statsThreads, databases)
if err != nil {
return nil, err
}
statsPro.SetTimers(int64(1*time.Nanosecond), int64(1*time.Second))
ctxFact := func(context.Context) (*sql.Context, error) {
sess := d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"})
return sql.NewContext(context.Background(), sql.WithSession(sess)), nil
}
if err = statsProv.Configure(sqlCtx, ctxFact, bThreads, dsessDbs); err != nil {
err = statsPro.Restart()
if err != nil {
return nil, err
}
statsOnlyQueries := filterStatsOnlyQueries(d.setupData)
e, err = enginetest.RunSetupScripts(sqlCtx, e, statsOnlyQueries, d.SupportsNativeIndexCreation())
if err != nil {
return nil, err
}
finalizeStatsAfterSetup := []setup.SetupScript{{"call dolt_stats_wait()"}}
e, err = enginetest.RunSetupScripts(sqlCtx, d.engine, finalizeStatsAfterSetup, d.SupportsNativeIndexCreation())
require.NoError(t, err)
}
return e, nil
}
// Reset the mysql DB table to a clean state for this new engine
ctx := enginetest.NewContext(d)
d.engine.Analyzer.Catalog.MySQLDb = mysql_db.CreateEmptyMySQLDb()
d.engine.Analyzer.Catalog.MySQLDb.AddRootAccount()
d.engine.Analyzer.Catalog.StatsProvider = statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
var err error
sqlCtx := enginetest.NewContext(d)
e, err := enginetest.RunSetupScripts(sqlCtx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
e, err := enginetest.RunSetupScripts(ctx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
require.NoError(t, err)
// Get a fresh session after running setup scripts, since some setup scripts can change the session state
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
@@ -430,7 +444,6 @@ func (d *DoltHarness) NewDatabases(names ...string) []sql.Database {
doltProvider, ok := pro.(*sqle.DoltDatabaseProvider)
require.True(d.t, ok)
d.provider = doltProvider
d.statsPro = statspro.NewProvider(doltProvider, statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
var err error
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), doltProvider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
@@ -502,7 +515,6 @@ func (d *DoltHarness) NewDatabaseProvider() sql.MutableDatabaseProvider {
func (d *DoltHarness) Close() {
d.closeProvider()
sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, int8(0))
}
func (d *DoltHarness) closeProvider() {
@@ -17,159 +17,156 @@ package enginetest
import (
"fmt"
"strings"
"testing"
gms "github.com/dolthub/go-mysql-server"
"github.com/dolthub/go-mysql-server/enginetest"
"github.com/dolthub/go-mysql-server/enginetest/queries"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
)
// fillerVarchar pushes the tree into level 3
var fillerVarchar = strings.Repeat("x", 500)
var DoltHistogramTests = []queries.ScriptTest{
{
Name: "mcv checking",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
"analyze table xy",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt where table_name = 'xy' and column_name = 'y,z'",
Expected: []sql.Row{
{types.JSONDocument{Val: []interface{}{
float64(4),
}}},
},
},
{
Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt where table_name = 'xy' and column_name = 'y,z'",
Expected: []sql.Row{
{types.JSONDocument{Val: []interface{}{
[]interface{}{float64(0), "a"},
}}},
},
},
{
Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt where table_name = 'xy' and column_name = 'y,z'",
Expected: []sql.Row{
{2, "a"},
},
},
},
},
{
Name: "int pk",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y varchar(500));",
fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
"analyze table xy",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'",
Expected: []sql.Row{{32}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'",
Expected: []sql.Row{{float64(30000)}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'",
Expected: []sql.Row{{float64(0)}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'",
Expected: []sql.Row{{float64(30000)}},
},
{
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'",
Expected: []sql.Row{{int64(1)}},
},
},
},
{
Name: "nulls distinct across chunk boundary",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar),
"analyze table xy",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
Expected: []sql.Row{{2}},
},
{
// bucket boundary duplication
Query: "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'",
Expected: []sql.Row{{202}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{float64(400)}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{float64(200)}},
},
{
// chunk border double count
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{float64(202)}},
},
{
// max bound count is an all nulls chunk
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{int64(183)}},
},
},
},
{
Name: "int index",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
"analyze table xy",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
Expected: []sql.Row{{152}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{float64(30000)}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{float64(10000)}},
},
{
// border NULL double count
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{float64(20036)}},
},
{
// max bound count is nulls chunk
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
Expected: []sql.Row{{int64(440)}},
},
},
},
//{
// Name: "mcv checking",
// SetUpScript: []string{
// "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
// "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
// "analyze table xy",
// },
// Assertions: []queries.ScriptTestAssertion{
// {
// Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt where table_name = 'xy' and column_name = 'y,z'",
// Expected: []sql.Row{
// {types.JSONDocument{Val: []interface{}{
// float64(4),
// }}},
// },
// },
// {
// Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt where table_name = 'xy' and column_name = 'y,z'",
// Expected: []sql.Row{
// {types.JSONDocument{Val: []interface{}{
// []interface{}{float64(0), "a"},
// }}},
// },
// },
// {
// Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt where table_name = 'xy' and column_name = 'y,z'",
// Expected: []sql.Row{
// {2, "a"},
// },
// },
// },
//},
//{
// Name: "int pk",
// SetUpScript: []string{
// "CREATE table xy (x bigint primary key, y varchar(500));",
// fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
// fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
// fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
// "analyze table xy",
// },
// Assertions: []queries.ScriptTestAssertion{
// {
// Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'",
// Expected: []sql.Row{{32}},
// },
// {
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'",
// Expected: []sql.Row{{float64(30000)}},
// },
// {
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'",
// Expected: []sql.Row{{float64(0)}},
// },
// {
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'",
// Expected: []sql.Row{{float64(30000)}},
// },
// {
// Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'",
// Expected: []sql.Row{{int64(1)}},
// },
// },
//},
//{
// Name: "nulls distinct across chunk boundary",
// SetUpScript: []string{
// "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
// fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar),
// fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar),
// "analyze table xy",
// },
// Assertions: []queries.ScriptTestAssertion{
// {
// Query: "call dolt_stats_wait()",
// },
// {
// Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
// Expected: []sql.Row{{2}},
// },
// {
// // bucket boundary duplication
// Query: "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'",
// Expected: []sql.Row{{202}},
// },
// {
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{float64(400)}},
// },
// {
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{float64(200)}},
// },
// {
// // chunk border double count
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{float64(202)}},
// },
// {
// // max bound count is an all nulls chunk
// Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{int64(183)}},
// },
// },
//},
//{
// Name: "int index",
// SetUpScript: []string{
// "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
// fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
// fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
// fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
// "analyze table xy",
// },
// Assertions: []queries.ScriptTestAssertion{
// {
// Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
// Expected: []sql.Row{{152}},
// },
// {
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{float64(30000)}},
// },
// {
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{float64(10000)}},
// },
// {
// // border NULL double count
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{float64(20036)}},
// },
// {
// // max bound count is nulls chunk
// Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
// Expected: []sql.Row{{int64(440)}},
// },
// },
//},
{
Name: "multiint index",
SetUpScript: []string{
@@ -177,9 +174,11 @@ var DoltHistogramTests = []queries.ScriptTest{
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
"analyze table xy",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_stats_wait()",
},
{
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'",
Expected: []sql.Row{{155}},
@@ -203,6 +202,41 @@ var DoltHistogramTests = []queries.ScriptTest{
},
},
},
{
Name: "multiint index small",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(x, z));",
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 2) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 3 union select x+1 from inputs where x < 4) select * from inputs) dt", fillerVarchar),
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 5 union select x+1 from inputs where x < 6) select * from inputs) dt", fillerVarchar),
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_stats_wait()",
},
{
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'",
Expected: []sql.Row{{1}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
Expected: []sql.Row{{float64(6)}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
Expected: []sql.Row{{float64(2)}},
},
{
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
Expected: []sql.Row{{float64(6)}},
},
{
// max bound count is nulls chunk
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
Expected: []sql.Row{{int64(1)}},
},
},
},
{
Name: "several int index",
SetUpScript: []string{
@@ -211,7 +245,10 @@ var DoltHistogramTests = []queries.ScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: " SELECT column_name from information_schema.column_statistics",
Query: "call dolt_stats_purge()",
},
{
Query: "SELECT column_name from information_schema.column_statistics",
Expected: []sql.Row{},
},
{
@@ -535,8 +572,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
{
Name: "incremental stats deletes auto",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
"analyze table xy",
@@ -550,10 +585,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
Query: "delete from xy where x > 500",
},
{
Query: "call dolt_stats_restart()",
},
{
Query: "select sleep(.1)",
Query: "analyze table xy",
},
{
Query: "select count(*) from dolt_statistics group by table_name, index_name",
@@ -565,8 +597,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
// https://github.com/dolthub/dolt/issues/8504
Name: "alter index column type",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
@@ -594,78 +624,9 @@ var DoltStatsStorageTests = []queries.ScriptTest{
},
},
},
{
Name: "differentiate table cases",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table XY (x bigint primary key, y varchar(16))",
"insert into XY values (0,'0'), (1,'1'), (2,'2')",
"analyze table XY",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "deleted table loads OK",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"CREATE table uv (u bigint primary key, v varchar(16))",
"insert into uv values (0,'0'), (1,'1'), (2,'2')",
"analyze table uv",
"drop table uv",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
},
},
{
Name: "differentiate branch names",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches ='main,feat'",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
"call dolt_checkout('-b', 'feat')",
"CREATE table xy (x varchar(16) primary key, y bigint, z bigint)",
"insert into xy values (3,'3',3)",
"analyze table xy",
"call dolt_checkout('main')",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "2"}},
},
{
Query: "call dolt_checkout('feat')",
},
{
Query: "select table_name, upper_bound from dolt_statistics",
Expected: []sql.Row{{"xy", "3"}},
},
},
},
{
Name: "drop primary key",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
@@ -682,10 +643,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
Query: "insert into xy values ('3', '3')",
},
{
Query: "call dolt_stats_restart()",
},
{
Query: "select sleep(.2)",
Query: "analyze table xy",
},
{
Query: "select count(*) from dolt_statistics group by table_name, index_name",
@@ -699,9 +657,6 @@ var StatBranchTests = []queries.ScriptTest{
{
Name: "multi branch stats",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"set @@PERSIST.dolt_stats_branches = 'main,feat';",
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
"call dolt_commit('-Am', 'xy')",
@@ -713,10 +668,7 @@ var StatBranchTests = []queries.ScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_stats_restart()",
},
{
Query: "select sleep(.1)",
Query: "call dolt_stats_wait()",
},
{
Query: "select table_name, index_name, row_count from dolt_statistics",
@@ -751,7 +703,7 @@ var StatBranchTests = []queries.ScriptTest{
Query: "call dolt_commit('-am', 'cm')",
},
{
Query: "select sleep(.1)",
Query: "call dolt_stats_wait()",
},
{
Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
@@ -769,30 +721,6 @@ var StatBranchTests = []queries.ScriptTest{
{"xy", "y", uint64(6)},
},
},
{
Query: "call dolt_checkout('feat')",
},
{
Query: "call dolt_stats_stop()",
},
{
Query: "select sleep(.1)",
},
{
Query: "call dolt_stats_drop()",
},
{
Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
Expected: []sql.Row{},
},
{
// we dropped 'feat', not 'main'
Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'",
Expected: []sql.Row{
{"xy", "primary", uint64(6)},
{"xy", "y", uint64(6)},
},
},
},
},
{
@@ -812,302 +740,3 @@ var StatBranchTests = []queries.ScriptTest{
},
},
}
var StatProcTests = []queries.ScriptTest{
{
Name: "deleting stats removes information_schema access point",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy values (0,0,0)",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "analyze table xy",
},
{
Query: "select count(*) from information_schema.column_statistics",
Expected: []sql.Row{{2}},
},
{
Query: "call dolt_stats_drop()",
},
{
Query: "select count(*) from information_schema.column_statistics",
Expected: []sql.Row{{0}},
},
},
},
{
Name: "restart empty stats panic",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "analyze table xy",
},
{
Query: "select count(*) from dolt_statistics",
Expected: []sql.Row{{0}},
},
{
Query: "set @@GLOBAL.dolt_stats_auto_refresh_threshold = 0",
Expected: []sql.Row{{}},
},
{
Query: "set @@GLOBAL.dolt_stats_auto_refresh_interval = 0",
Expected: []sql.Row{{}},
},
{
// don't panic
Query: "call dolt_stats_restart()",
},
{
Query: "select sleep(.1)",
},
{
Query: "insert into xy values (0,0,0)",
},
{
Query: "select sleep(.1)",
},
{
Query: "select count(*) from dolt_statistics",
Expected: []sql.Row{{2}},
},
},
},
{
Name: "basic start, status, stop loop",
SetUpScript: []string{
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select count(*) from dolt_statistics",
Expected: []sql.Row{{0}},
},
{
Query: "call dolt_stats_status()",
Expected: []sql.Row{{"no active stats thread"}},
},
// set refresh interval arbitrarily high to avoid updating when we restart
{
Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 100000;",
Expected: []sql.Row{{}},
},
{
Query: "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0",
Expected: []sql.Row{{}},
},
{
Query: "call dolt_stats_restart()",
},
{
Query: "call dolt_stats_status()",
Expected: []sql.Row{{"restarted thread: mydb"}},
},
{
Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
Expected: []sql.Row{{}},
},
// new restart picks up 0-interval, will start refreshing immediately
{
Query: "call dolt_stats_restart()",
},
{
Query: "select sleep(.1)",
},
{
Query: "call dolt_stats_status()",
Expected: []sql.Row{{"refreshed mydb"}},
},
{
Query: "select count(*) from dolt_statistics",
Expected: []sql.Row{{2}},
},
// kill refresh thread
{
Query: "call dolt_stats_stop()",
},
{
Query: "call dolt_stats_status()",
Expected: []sql.Row{{"cancelled thread: mydb"}},
},
// insert without refresh thread will not update stats
{
Query: "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'), (7,1,'a')",
},
{
Query: "select sleep(.1)",
},
{
Query: "call dolt_stats_status()",
Expected: []sql.Row{{"cancelled thread: mydb"}},
},
// manual analyze will update stats
{
Query: "analyze table xy",
Expected: []sql.Row{{"xy", "analyze", "status", "OK"}},
},
{
Query: "call dolt_stats_status()",
Expected: []sql.Row{{"refreshed mydb"}},
},
{
Query: "select count(*) from dolt_statistics",
Expected: []sql.Row{{2}},
},
// kill refresh thread and delete stats ref
{
Query: "call dolt_stats_drop()",
},
{
Query: "call dolt_stats_status()",
Expected: []sql.Row{{"dropped"}},
},
{
Query: "select count(*) from dolt_statistics",
Expected: []sql.Row{{0}},
},
},
},
{
Name: "test purge",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;",
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');",
"analyze table xy",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
Expected: []sql.Row{{1}, {1}},
},
{
Query: "call dolt_stats_purge()",
},
{
Query: "select count(*) from dolt_statistics;",
Expected: []sql.Row{{0}},
},
},
},
{
Name: "test prune",
SetUpScript: []string{
"set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;",
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');",
"analyze table xy",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
Expected: []sql.Row{{1}, {1}},
},
{
Query: "call dolt_stats_prune()",
},
{
Query: "select count(*) from dolt_statistics;",
Expected: []sql.Row{{2}},
},
},
},
}
// TestProviderReloadScriptWithEngine runs the test script given with the engine provided.
func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) {
ctx := enginetest.NewContext(harness)
err := enginetest.CreateNewConnectionForServerEngine(ctx, e)
require.NoError(t, err, nil)
t.Run(script.Name, func(t *testing.T) {
for _, statement := range script.SetUpScript {
if sh, ok := harness.(enginetest.SkippingHarness); ok {
if sh.SkipQueryTest(statement) {
t.Skip()
}
}
ctx = ctx.WithQuery(statement)
enginetest.RunQueryWithContext(t, e, harness, ctx, statement)
}
assertions := script.Assertions
if len(assertions) == 0 {
assertions = []queries.ScriptTestAssertion{
{
Query: script.Query,
Expected: script.Expected,
ExpectedErr: script.ExpectedErr,
ExpectedIndexes: script.ExpectedIndexes,
},
}
}
{
// reload provider, get disk stats
eng, ok := e.(*gms.Engine)
if !ok {
t.Errorf("expected *gms.Engine but found: %T", e)
}
branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb")
brCopy := make([]string, len(branches))
copy(brCopy, branches)
err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
require.NoError(t, err)
for _, branch := range brCopy {
err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch)
require.NoError(t, err)
}
}
for _, assertion := range assertions {
t.Run(assertion.Query, func(t *testing.T) {
if assertion.NewSession {
th, ok := harness.(enginetest.TransactionHarness)
require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+
"but harness doesn't implement TransactionHarness")
ctx = th.NewSession()
}
if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) {
t.Skip()
}
if assertion.Skip {
t.Skip()
}
if assertion.ExpectedErr != nil {
enginetest.AssertErr(t, e, harness, assertion.Query, nil, assertion.ExpectedErr)
} else if assertion.ExpectedErrStr != "" {
enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, nil, assertion.ExpectedErrStr)
} else if assertion.ExpectedWarning != 0 {
enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query,
assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount,
assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck)
} else if assertion.SkipResultsCheck {
enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query)
} else if assertion.CheckIndexedAccess {
enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
} else {
var expected = assertion.Expected
if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine {
// TODO: remove this check in the future
expected = nil
}
enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings, nil)
}
})
}
})
}
func mustNewStatQual(s string) sql.StatQualifier {
qual, _ := sql.NewQualifierFromString(s)
return qual
}
@@ -292,7 +292,7 @@ type IndexScanBuilder interface {
// NewSecondaryIter returns an object used to perform secondary lookups
// for index joins.
NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen
NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error)
// Key returns the table root for caching purposes
Key() doltdb.DataCacheKey
@@ -395,7 +395,10 @@ func newNonCoveringLookupBuilder(s *durableIndexState, b *baseIndexImplBuilder)
"primary index passed, but only secondary indexes are supported")
}
primary := durable.ProllyMapFromIndex(s.Primary)
primary, err := durable.ProllyMapFromIndex(s.Primary)
if err != nil {
return nil, err
}
priKd, _ := primary.Descriptors()
tbBld := val.NewTupleBuilder(priKd)
pkMap := OrdinalMappingFromIndex(b.idx)
@@ -452,7 +455,7 @@ func (ib *baseIndexImplBuilder) NewRangeMapIter(_ context.Context, _ prolly.Rang
panic("cannot call NewMapIter on baseIndexImplBuilder")
}
func (ib *baseIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
func (ib *baseIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
panic("cannot call NewSecondaryIter on baseIndexImplBuilder")
}
@@ -628,11 +631,11 @@ func (ib *coveringIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, part s
}
// NewSecondaryIter implements IndexScanBuilder
func (ib *coveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
func (ib *coveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
if strict {
return &covStrictSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx}
return &covStrictSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx}, nil
} else {
return &covLaxSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx, nullSafe: nullSafe}
return &covLaxSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx, nullSafe: nullSafe}, nil
}
}
@@ -735,11 +738,11 @@ func (ib *nonCoveringIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, par
}, nil
}
func (ib *nonCoveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
func (ib *nonCoveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
if strict {
return &nonCovStrictSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt)}
return &nonCovStrictSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt)}, nil
} else {
return &nonCovLaxSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt), nullSafe: nullSafe}
return &nonCovLaxSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt), nullSafe: nullSafe}, nil
}
}
@@ -766,12 +769,18 @@ func (ib *keylessIndexImplBuilder) OutputSchema() schema.Schema {
func (ib *keylessIndexImplBuilder) NewRangeMapIter(ctx context.Context, r prolly.Range, reverse bool) (prolly.MapIter, error) {
rows := ib.s.Primary
dsecondary := ib.s.Secondary
secondary := durable.ProllyMapFromIndex(dsecondary)
secondary, err := durable.ProllyMapFromIndex(dsecondary)
if err != nil {
return nil, err
}
indexIter, err := secondary.IterRange(ctx, r)
if err != nil {
return nil, err
}
clustered := durable.ProllyMapFromIndex(rows)
clustered, err := durable.ProllyMapFromIndex(rows)
if err != nil {
return nil, err
}
keyDesc := clustered.KeyDesc()
indexMap := OrdinalMappingFromIndex(ib.idx)
@@ -832,12 +841,18 @@ func (ib *keylessIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, part sq
return newProllyKeylessIndexIter(ctx, ib.idx, prollyRange, doltgresRange, ib.sch, ib.projections, ib.s.Primary, ib.s.Secondary, reverse)
}
func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
pri := durable.ProllyMapFromIndex(ib.s.Primary)
func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
pri, err := durable.ProllyMapFromIndex(ib.s.Primary)
if err != nil {
return nil, err
}
pkDesc, _ := pri.Descriptors()
pkBld := val.NewTupleBuilder(pkDesc)
secondary := durable.ProllyMapFromIndex(ib.s.Secondary)
secondary, err := durable.ProllyMapFromIndex(ib.s.Secondary)
if err != nil {
return nil, err
}
return &keylessSecondaryLookupGen{
pri: pri,
@@ -846,7 +861,7 @@ func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSa
pkMap: OrdinalMappingFromIndex(ib.idx),
pkBld: pkBld,
prefixDesc: secondary.KeyDesc().PrefixDesc(cnt),
}
}, nil
}
type nomsIndexImplBuilder struct {
@@ -870,7 +885,7 @@ func (ib *nomsIndexImplBuilder) NewRangeMapIter(ctx context.Context, r prolly.Ra
panic("cannot call NewMapIter on *nomsIndexImplBuilder")
}
func (ib *nomsIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
func (ib *nomsIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
panic("cannot call NewSecondaryIter on *nomsIndexImplBuilder")
}
@@ -59,13 +59,20 @@ func newProllyIndexIter(
projections []uint64,
dprimary, dsecondary durable.Index,
) (prollyIndexIter, error) {
secondary := durable.ProllyMapFromIndex(dsecondary)
secondary, err := durable.ProllyMapFromIndex(dsecondary)
if err != nil {
return prollyIndexIter{}, err
}
indexIter, err := secondary.IterRange(ctx, rng)
if err != nil {
return prollyIndexIter{}, err
}
primary := durable.ProllyMapFromIndex(dprimary)
primary, err := durable.ProllyMapFromIndex(dprimary)
if err != nil {
return prollyIndexIter{}, err
}
kd, _ := primary.Descriptors()
pkBld := val.NewTupleBuilder(kd)
pkMap := OrdinalMappingFromIndex(idx)
@@ -183,7 +190,10 @@ func newProllyCoveringIndexIter(
projections []uint64,
indexdata durable.Index,
) (prollyCoveringIndexIter, error) {
secondary := durable.ProllyMapFromIndex(indexdata)
secondary, err := durable.ProllyMapFromIndex(indexdata)
if err != nil {
return prollyCoveringIndexIter{}, err
}
indexIter, err := secondary.IterRange(ctx, rng)
if err != nil {
return prollyCoveringIndexIter{}, err
@@ -293,9 +303,11 @@ type prollyKeylessIndexIter struct {
var _ sql.RowIter = prollyKeylessIndexIter{}
func newProllyKeylessIndexIter(ctx *sql.Context, idx DoltIndex, rng prolly.Range, doltgresRange *DoltgresRange, pkSch sql.PrimaryKeySchema, projections []uint64, rows, dsecondary durable.Index, reverse bool) (prollyKeylessIndexIter, error) {
secondary := durable.ProllyMapFromIndex(dsecondary)
secondary, err := durable.ProllyMapFromIndex(dsecondary)
if err != nil {
return prollyKeylessIndexIter{}, err
}
var indexIter prolly.MapIter
var err error
if doltgresRange == nil {
if reverse {
indexIter, err = secondary.IterRangeReverse(ctx, rng)
@@ -312,7 +324,10 @@ func newProllyKeylessIndexIter(ctx *sql.Context, idx DoltIndex, rng prolly.Range
}
}
clustered := durable.ProllyMapFromIndex(rows)
clustered, err := durable.ProllyMapFromIndex(rows)
if err != nil {
return prollyKeylessIndexIter{}, err
}
keyDesc, valDesc := clustered.Descriptors()
indexMap := OrdinalMappingFromIndex(idx)
keyBld := val.NewTupleBuilder(keyDesc)
+21 -6
View File
@@ -364,7 +364,10 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M
if rowData.Format() != types.Format_DOLT {
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, nil
}
priMap = durable.ProllyMapFromIndex(rowData)
priMap, err = durable.ProllyMapFromIndex(rowData)
if err != nil {
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
}
priSch = lb.OutputSchema()
@@ -384,7 +387,7 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
}
} else {
dstIter = lb.NewSecondaryIter(n.IsStrictLookup(), len(n.Expressions()), n.NullMask())
dstIter, _ = lb.NewSecondaryIter(n.IsStrictLookup(), len(n.Expressions()), n.NullMask())
}
case *plan.ResolvedTable:
@@ -414,7 +417,10 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M
if err != nil {
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
}
priMap = durable.ProllyMapFromIndex(priIndex)
priMap, err = durable.ProllyMapFromIndex(priIndex)
if err != nil {
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
}
secMap = priMap
srcIter, err = priMap.IterAll(ctx)
@@ -535,7 +541,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) {
if err != nil {
return ms, err
}
ms.idxMap = durable.ProllyMapFromIndex(secIdx)
ms.idxMap, err = durable.ProllyMapFromIndex(secIdx)
if err != nil {
return mergeState{}, err
}
table, err = doltTable.DoltTable(ctx)
if err != nil {
return ms, err
@@ -560,7 +569,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) {
if err != nil {
return ms, err
}
ms.idxMap = durable.ProllyMapFromIndex(priIndex)
ms.idxMap, err = durable.ProllyMapFromIndex(priIndex)
if err != nil {
return mergeState{}, err
}
secIterGen = index.NewKeylessIndexImplBuilder(priIndex, secIdx, idx)
} else {
secIterGen = index.NewSecondaryIterGen(ms.idxMap)
@@ -584,7 +596,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) {
return ms, err
}
priMap := durable.ProllyMapFromIndex(priIndex)
priMap, err := durable.ProllyMapFromIndex(priIndex)
if err != nil {
return ms, err
}
pkMap := index.OrdinalMappingFromIndex(idx)
priKd, _ := priMap.Descriptors()
pkBld := val.NewTupleBuilder(priKd)
@@ -33,7 +33,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/env"
dsql "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
@@ -144,11 +143,10 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error {
return err
}
statsPro := statspro.NewProvider(pro.(*dsql.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(env.NewGRPCDialProviderFromDoltEnv(dEnv)))
gcSafepointController := dsess.NewGCSafepointController()
config, _ := dEnv.Config.GetConfig(env.GlobalConfig)
sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statsPro, gcSafepointController)
sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statspro.StatsNoop{}, gcSafepointController)
h.sess = sqlCtx.Session.(*dsess.DoltSession)
dbs := h.engine.Analyzer.Catalog.AllDatabases(sqlCtx)
+9 -2
View File
@@ -183,7 +183,11 @@ func ProllyRowIterFromPartition(
projections []uint64,
partition doltTablePartition,
) (sql.RowIter, error) {
rows := durable.ProllyMapFromIndex(partition.rowData)
rows, err := durable.ProllyMapFromIndex(partition.rowData)
if err != nil {
return nil, err
}
c, err := rows.Count()
if err != nil {
return nil, err
@@ -243,7 +247,10 @@ func DoltTablePartitionToRowIter(ctx *sql.Context, name string, table *doltdb.Ta
}
if types.IsFormat_DOLT(data.Format()) {
idx := durable.ProllyMapFromIndex(data)
idx, err := durable.ProllyMapFromIndex(data)
if err != nil {
return nil, nil, err
}
c, err := idx.Count()
if err != nil {
return nil, nil, err
@@ -1127,6 +1127,7 @@ func newTestEngine(ctx context.Context, dEnv *env.DoltEnv) (*gms.Engine, *sql.Co
IsServerLocked: false,
}), sqlCtx
}
func TestIndexOverwrite(t *testing.T) {
ctx := context.Background()
dEnv := dtestutils.CreateTestEnv()
@@ -1,489 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statsnoms
import (
"context"
"errors"
"fmt"
"path"
"strings"
"sync"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
"github.com/dolthub/dolt/go/libraries/utils/earl"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/datas"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/types"
)
func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory {
return &NomsStatsFactory{dialPro: dialPro}
}
type NomsStatsFactory struct {
dialPro dbfactory.GRPCDialProvider
}
var _ statspro.StatsFactory = NomsStatsFactory{}
func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) {
params := make(map[string]interface{})
params[dbfactory.GRPCDialProviderParam] = sf.dialPro
var urlPath string
u, err := earl.Parse(prov.DbFactoryUrl())
if u.Scheme == dbfactory.MemScheme {
urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir)
} else if u.Scheme == dbfactory.FileScheme {
urlPath = doltdb.LocalDirDoltDB
}
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
if err != nil {
return nil, err
}
var dEnv *env.DoltEnv
exists, isDir := statsFs.Exists("")
if !exists {
err := statsFs.MkDirs("")
if err != nil {
return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
}
dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test")
sess := dsess.DSessFromSess(ctx.Session)
err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch())
if err != nil {
return nil, err
}
} else if !isDir {
return nil, fmt.Errorf("file exists where the dolt stats directory should be")
} else {
dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "", "")
}
dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params)
deaf := dEnv.DbEaFactory(ctx)
tmpDir, err := dEnv.TempTableFilesDir()
if err != nil {
return nil, err
}
opts := editor.Options{
Deaf: deaf,
Tempdir: tmpDir,
}
statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
if err != nil {
return nil, err
}
return NewNomsStats(sourceDb, statsDb), nil
}
func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase {
return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb}
}
type dbStats map[sql.StatQualifier]*statspro.DoltStats
type NomsStatsDatabase struct {
mu *sync.Mutex
destDb dsess.SqlDatabase
sourceDb dsess.SqlDatabase
stats []dbStats
branches []string
tableHashes []map[string]hash.Hash
schemaHashes []map[string]hash.Hash
dirty []*prolly.MutableMap
}
var _ statspro.Database = (*NomsStatsDatabase)(nil)
func (n *NomsStatsDatabase) Close() error {
return n.destDb.DbData().Ddb.Close()
}
func (n *NomsStatsDatabase) Branches() []string {
return n.branches
}
func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch)
dSess := dsess.DSessFromSess(ctx.Session)
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
if err != nil {
ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch)
return nil
}
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
if !ok {
return fmt.Errorf("branch/database not found: %s", branchQDbName)
}
if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil {
return err
} else if ok {
ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name())
if err := n.DeleteBranchStats(ctx, branch, true); err != nil {
return err
}
}
statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch)
if errors.Is(err, doltdb.ErrNoStatistics) {
return n.trackBranch(ctx, branch)
} else if errors.Is(err, datas.ErrNoBranchStats) {
return n.trackBranch(ctx, branch)
} else if err != nil {
return err
}
if cnt, err := statsMap.Count(); err != nil {
return err
} else if cnt == 0 {
return n.trackBranch(ctx, branch)
}
doltStats, err := loadStats(ctx, branchQDb, statsMap)
if err != nil {
return err
}
n.branches = append(n.branches, branch)
n.stats = append(n.stats, doltStats)
n.dirty = append(n.dirty, nil)
n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
return nil
}
func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) {
root, err := branchQDb.GetRoot(ctx)
if err != nil {
return false, err
}
tables, err := branchQDb.GetTableNames(ctx)
if err != nil {
return false, err
}
var keys []string
var schHashes []hash.Hash
for _, tableName := range tables {
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName})
if err != nil {
return false, err
}
if !ok {
return false, nil
}
curHash, err := table.GetSchemaHash(ctx)
if err != nil {
return false, err
}
keys = append(keys, n.schemaTupleKey(branch, tableName))
schHashes = append(schHashes, curHash)
}
ddb := n.destDb.DbData().Ddb
var schemaChange bool
for i, key := range keys {
curHash := schHashes[i]
if val, ok, err := ddb.GetTuple(ctx, key); err != nil {
return false, err
} else if ok {
oldHash := hash.Parse(string(val))
if !ok || !oldHash.Equal(curHash) {
schemaChange = true
break
}
}
}
if schemaChange {
for _, key := range keys {
ddb.DeleteTuple(ctx, key)
}
return true, nil
}
return false, nil
}
func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats {
for i, b := range n.branches {
if strings.EqualFold(b, branch) {
return n.stats[i]
}
}
return nil
}
func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) {
n.mu.Lock()
defer n.mu.Unlock()
stats := n.getBranchStats(branch)
ret, ok := stats[qual]
return ret, ok
}
func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier {
n.mu.Lock()
defer n.mu.Unlock()
stats := n.getBranchStats(branch)
var ret []sql.StatQualifier
for qual, _ := range stats {
ret = append(ret, qual)
}
return ret
}
func (n *NomsStatsDatabase) setStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
var statsMap *prolly.MutableMap
for i, b := range n.branches {
if strings.EqualFold(branch, b) {
n.stats[i][qual] = stats
if n.dirty[i] == nil {
if err := n.initMutable(ctx, i); err != nil {
return err
}
}
statsMap = n.dirty[i]
}
}
if statsMap == nil {
if err := n.trackBranch(ctx, branch); err != nil {
return err
}
statsMap = n.dirty[len(n.branches)-1]
n.stats[len(n.branches)-1][qual] = stats
}
return n.replaceStats(ctx, statsMap, stats)
}
func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
n.mu.Lock()
defer n.mu.Unlock()
return n.setStat(ctx, branch, qual, stats)
}
func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error {
n.branches = append(n.branches, branch)
n.stats = append(n.stats, make(dbStats))
n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
ns := n.destDb.DbData().Ddb.NodeStore()
kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors(ns)
newMap, err := prolly.NewMapFromTuples(ctx, ns, kd, vd)
if err != nil {
return err
}
n.dirty = append(n.dirty, newMap.Mutate())
return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf())
}
func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error {
statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i])
if err != nil {
return err
}
n.dirty[i] = statsMap.Mutate()
return nil
}
func (n *NomsStatsDatabase) DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) {
n.mu.Lock()
defer n.mu.Unlock()
for i, b := range n.branches {
if strings.EqualFold(b, branch) {
for _, qual := range quals {
ctx.GetLogger().Debugf("statistics refresh: deleting index statistics: %s/%s", branch, qual)
delete(n.stats[i], qual)
}
}
}
}
func (n *NomsStatsDatabase) DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error {
n.mu.Lock()
defer n.mu.Unlock()
ctx.GetLogger().Debugf("statistics refresh: deleting branch statistics: %s", branch)
for i, b := range n.branches {
if strings.EqualFold(b, branch) {
n.branches = append(n.branches[:i], n.branches[i+1:]...)
n.dirty = append(n.dirty[:i], n.dirty[i+1:]...)
n.stats = append(n.stats[:i], n.stats[i+1:]...)
n.tableHashes = append(n.tableHashes[:i], n.tableHashes[i+1:]...)
n.schemaHashes = append(n.schemaHashes[:i], n.schemaHashes[i+1:]...)
}
}
if flush {
return n.destDb.DbData().Ddb.DropStatisics(ctx, branch)
}
return nil
}
func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error {
n.mu.Lock()
defer n.mu.Unlock()
var dbStat dbStats
for i, b := range n.branches {
if strings.EqualFold(b, branch) {
// naive merge the new with old
dbStat = n.stats[i]
}
}
if dbStat == nil {
if err := n.trackBranch(ctx, branch); err != nil {
return err
}
dbStat = n.stats[len(n.branches)-1]
}
if _, ok := dbStat[qual]; ok {
oldChunks := dbStat[qual].Hist
targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks)
if err != nil {
return err
}
newStat, err := dbStat[qual].WithHistogram(targetBuckets)
if err != nil {
return err
}
dbStat[qual] = newStat.(*statspro.DoltStats)
} else {
dbStat[qual] = statspro.NewDoltStats()
}
dbStat[qual].Chunks = targetHashes
dbStat[qual].UpdateActive()
// let |n.SetStats| update memory and disk
return n.setStat(ctx, branch, qual, dbStat[qual])
}
func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error {
n.mu.Lock()
defer n.mu.Unlock()
for i, b := range n.branches {
if strings.EqualFold(b, branch) {
if n.dirty[i] != nil {
flushedMap, err := n.dirty[i].Map(ctx)
if err != nil {
return err
}
n.dirty[i] = nil
if err := n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf()); err != nil {
return err
}
return nil
}
}
}
return nil
}
func (n *NomsStatsDatabase) GetTableHash(branch, tableName string) hash.Hash {
n.mu.Lock()
defer n.mu.Unlock()
for i, b := range n.branches {
if strings.EqualFold(branch, b) {
return n.tableHashes[i][tableName]
}
}
return hash.Hash{}
}
func (n *NomsStatsDatabase) SetTableHash(branch, tableName string, h hash.Hash) {
n.mu.Lock()
defer n.mu.Unlock()
for i, b := range n.branches {
if strings.EqualFold(branch, b) {
n.tableHashes[i][tableName] = h
break
}
}
}
func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) {
n.mu.Lock()
defer n.mu.Unlock()
for i, b := range n.branches {
if strings.EqualFold(branch, b) {
return n.schemaHashes[i][tableName], nil
}
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok {
if err != nil {
return hash.Hash{}, err
}
h := hash.Parse(string(val))
n.schemaHashes[i][tableName] = h
return h, nil
} else if err != nil {
return hash.Hash{}, err
}
break
}
return hash.Hash{}, nil
}
func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string {
return n.sourceDb.Name() + "/" + branch + "/" + tableName
}
func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error {
n.mu.Lock()
defer n.mu.Unlock()
branchIdx := -1
for i, b := range n.branches {
if strings.EqualFold(branch, b) {
branchIdx = i
break
}
}
if branchIdx < 0 {
branchIdx = len(n.branches)
if err := n.trackBranch(ctx, branch); err != nil {
return err
}
}
n.schemaHashes[branchIdx][tableName] = h
key := n.schemaTupleKey(branch, tableName)
if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound {
return err
}
return n.destDb.DbData().Ddb.SetTuple(ctx, key, []byte(h.String()))
}
@@ -1,176 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statsnoms
import (
"fmt"
"strings"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/planbuilder"
"gopkg.in/errgo.v2/errors"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
)
var ErrIncompatibleVersion = errors.New("client stats version mismatch")
func NewStatsIter(ctx *sql.Context, schemaName string, m prolly.Map) (*statsIter, error) {
iter, err := m.IterAll(ctx)
if err != nil {
return nil, err
}
kd, vd := m.Descriptors()
keyBuilder := val.NewTupleBuilder(kd)
valueBuilder := val.NewTupleBuilder(vd)
ns := m.NodeStore()
return &statsIter{
iter: iter,
kb: keyBuilder,
vb: valueBuilder,
ns: ns,
schemaName: schemaName,
planb: planbuilder.New(ctx, nil, nil, nil),
}, nil
}
// statsIter reads histogram buckets into string-compatible types.
// Values that are SQL rows should be converted with statsIter.ParseRow.
// todo: make a JSON compatible container for sql.Row w/ types so that we
// can eagerly convert to sql.Row without sacrificing string printing.
type statsIter struct {
iter prolly.MapIter
kb, vb *val.TupleBuilder
ns tree.NodeStore
planb *planbuilder.Builder
currentQual string
schemaName string
currentTypes []sql.Type
}
var _ sql.RowIter = (*statsIter)(nil)
func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) {
k, v, err := s.iter.Next(ctx)
if err != nil {
return nil, err
}
// deserialize K, V
version, err := tree.GetField(ctx, s.vb.Desc, 0, v, s.ns)
if err != nil {
return nil, err
}
if version != schema.StatsVersion {
return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
}
var row sql.Row
for i := 0; i < s.kb.Desc.Count(); i++ {
f, err := tree.GetField(ctx, s.kb.Desc, i, k, s.ns)
if err != nil {
return nil, err
}
row = append(row, f)
}
for i := 0; i < s.vb.Desc.Count(); i++ {
f, err := tree.GetField(ctx, s.vb.Desc, i, v, s.ns)
if err != nil {
return nil, err
}
row = append(row, f)
}
dbName := row[schema.StatsDbTag].(string)
tableName := row[schema.StatsTableTag].(string)
indexName := row[schema.StatsIndexTag].(string)
position := row[schema.StatsPositionTag].(int64)
_ = row[schema.StatsVersionTag]
commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
rowCount := row[schema.StatsRowCountTag].(int64)
distinctCount := row[schema.StatsDistinctCountTag].(int64)
nullCount := row[schema.StatsNullCountTag].(int64)
columnsStr := row[schema.StatsColumnsTag].(string)
typesStr := row[schema.StatsTypesTag].(string)
upperBoundStr := row[schema.StatsUpperBoundTag].(string)
upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64)
createdAt := row[schema.StatsCreatedAtTag].(time.Time)
typs := strings.Split(typesStr, "\n")
for i, t := range typs {
typs[i] = strings.TrimSpace(t)
}
qual := sql.NewStatQualifier(dbName, s.schemaName, tableName, indexName)
if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) {
s.currentQual = curQual
s.currentTypes, err = parseTypeStrings(typs)
if err != nil {
return nil, err
}
}
mcvCountsStr := row[schema.StatsMcvCountsTag].(string)
numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
mcvs := make([]string, numMcvs)
for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
if v != nil {
mcvs[i] = v.(string)
}
}
return sql.Row{
dbName,
tableName,
indexName,
int(position),
version,
commit.String(),
uint64(rowCount),
uint64(distinctCount),
uint64(nullCount),
columnsStr,
typesStr,
upperBoundStr,
uint64(upperBoundCnt),
createdAt,
mcvs[0], mcvs[1], mcvs[2], mcvs[3],
mcvCountsStr,
}, nil
}
func (s *statsIter) ParseRow(rowStr string) (sql.Row, error) {
var row sql.Row
for i, v := range strings.Split(rowStr, ",") {
val, _, err := s.currentTypes[i].Convert(v)
if err != nil {
return nil, err
}
row = append(row, val)
}
return row, nil
}
func (s *statsIter) Close(context *sql.Context) error {
return nil
}
@@ -1,308 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statsnoms
import (
"errors"
"fmt"
"io"
"strconv"
"strings"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/planbuilder"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
)
func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) {
qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats)
schemaName := db.SchemaName()
iter, err := NewStatsIter(ctx, schemaName, m)
if err != nil {
return nil, err
}
currentStat := statspro.NewDoltStats()
invalidTables := make(map[string]bool)
for {
row, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return nil, err
}
// deserialize K, V
dbName := row[schema.StatsDbTag].(string)
tableName := row[schema.StatsTableTag].(string)
indexName := row[schema.StatsIndexTag].(string)
_ = row[schema.StatsVersionTag]
commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
rowCount := row[schema.StatsRowCountTag].(uint64)
distinctCount := row[schema.StatsDistinctCountTag].(uint64)
nullCount := row[schema.StatsNullCountTag].(uint64)
columns := strings.Split(row[schema.StatsColumnsTag].(string), ",")
typesStr := row[schema.StatsTypesTag].(string)
boundRowStr := row[schema.StatsUpperBoundTag].(string)
upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64)
createdAt := row[schema.StatsCreatedAtTag].(time.Time)
typs := strings.Split(typesStr, "\n")
for i, t := range typs {
typs[i] = strings.TrimSpace(t)
}
qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName)
if _, ok := invalidTables[tableName]; ok {
continue
}
if currentStat.Statistic.Qual.String() != qual.String() {
if !currentStat.Statistic.Qual.Empty() {
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}
currentStat = statspro.NewDoltStats()
tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
if ok {
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
} else if !ok {
ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName)
invalidTables[qual.Table()] = true
continue
} else if err != nil {
return nil, err
}
}
numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",")
mcvCnts := make([]uint64, numMcvs)
for i, v := range mcvCountsStr {
if v == "" {
continue
}
val, err := strconv.Atoi(v)
if err != nil {
return nil, err
}
mcvCnts[i] = uint64(val)
}
mcvs := make([]sql.Row, numMcvs)
for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
if v != nil && v != "" {
row, err := DecodeRow(ctx, m.NodeStore(), v.(string), currentStat.Tb)
if err != nil {
return nil, err
}
mcvs[i] = row
}
}
for i, v := range mcvCnts {
if v == 0 {
mcvs = mcvs[:i]
mcvCnts = mcvCnts[:i]
break
}
}
if currentStat.Statistic.Hist == nil {
currentStat.Statistic.Typs, err = parseTypeStrings(typs)
if err != nil {
return nil, err
}
currentStat.Statistic.Qual = qual
}
boundRow, err := DecodeRow(ctx, m.NodeStore(), boundRowStr, currentStat.Tb)
if err != nil {
return nil, err
}
bucket := statspro.DoltBucket{
Chunk: commit,
Created: createdAt,
Bucket: &stats.Bucket{
RowCnt: uint64(rowCount),
DistinctCnt: uint64(distinctCount),
NullCnt: uint64(nullCount),
McvVals: mcvs,
McvsCnt: mcvCnts,
BoundCnt: upperBoundCnt,
BoundVal: boundRow,
},
}
currentStat.Hist = append(currentStat.Hist, bucket)
currentStat.Statistic.RowCnt += uint64(rowCount)
currentStat.Statistic.DistinctCnt += uint64(distinctCount)
currentStat.Statistic.NullCnt += uint64(rowCount)
if currentStat.Statistic.Created.Before(createdAt) {
currentStat.Statistic.Created = createdAt
}
}
if !currentStat.Qualifier().Empty() {
currentStat.UpdateActive()
qualToStats[currentStat.Statistic.Qual] = currentStat
}
return qualToStats, nil
}
func parseTypeStrings(typs []string) ([]sql.Type, error) {
var ret []sql.Type
for _, typ := range typs {
ct, err := planbuilder.ParseColumnTypeString(typ)
if err != nil {
return nil, err
}
ret = append(ret, ct)
}
return ret, nil
}
func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) {
root, err := db.GetRoot(ctx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
iat, ok := sqlTable.(sql.IndexAddressable)
if !ok {
return nil, nil, nil, sql.ColSet{}, nil
}
indexes, err := iat.GetIndexes(ctx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
var sqlIdx sql.Index
for _, i := range indexes {
if strings.EqualFold(i.ID(), qual.Index()) {
sqlIdx = i
break
}
}
if sqlIdx == nil {
return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
}
fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()})
if !ok {
return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table())
}
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
var idx durable.Index
if qual.Index() == "primary" {
idx, err = table.GetRowData(ctx)
} else {
idx, err = table.GetIndexRowData(ctx, qual.Index())
}
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
prollyMap := durable.ProllyMapFromIndex(idx)
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols))
buffPool := prollyMap.NodeStore().Pool()
if cnt, err := prollyMap.Count(); err != nil {
return nil, nil, nil, sql.ColSet{}, err
} else if cnt == 0 {
return nil, keyBuilder, nil, sql.ColSet{}, nil
}
firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
keyBytes, _, err := firstIter.Next(ctx)
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
for i := range keyBuilder.Desc.Types {
keyBuilder.PutRaw(i, keyBytes.GetField(i))
}
firstKey := keyBuilder.Build(buffPool)
firstRow := make(sql.Row, keyBuilder.Desc.Count())
for i := 0; i < keyBuilder.Desc.Count(); i++ {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, nil, nil, sql.ColSet{}, err
}
}
return firstRow, keyBuilder, fds, colset, nil
}
func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
if err != nil {
return nil, sql.ColSet{}, err
} else if !ok {
return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table())
}
iat, ok := tab.(sql.IndexAddressable)
if !ok {
return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table())
}
indexes, err := iat.GetIndexes(ctx)
if err != nil {
return nil, sql.ColSet{}, err
}
var idx sql.Index
for _, i := range indexes {
if strings.EqualFold(i.ID(), qual.Index()) {
idx = i
break
}
}
if idx == nil {
return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
}
return stats.IndexFds(qual.Table(), tab.Schema(), idx)
}
@@ -1,181 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statsnoms
import (
"context"
"errors"
"io"
"strings"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
)
// About ~200 20 byte address fit in a ~4k chunk. Chunk sizes
// are approximate, but certainly shouldn't reach the square
// of the expected size.
const maxBucketFanout = 200 * 200
var mcvsTypes = []sql.Type{types.Int64, types.Int64, types.Int64}
func (n *NomsStatsDatabase) replaceStats(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
if err := deleteIndexRows(ctx, statsMap, dStats); err != nil {
return err
}
return putIndexRows(ctx, statsMap, dStats)
}
func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
if ctx.Err() != nil {
return ctx.Err()
}
sch := schema.StatsTableDoltSchema
kd, _ := sch.GetMapDescriptors(statsMap.NodeStore())
keyBuilder := val.NewTupleBuilder(kd)
qual := dStats.Qualifier()
pool := statsMap.NodeStore().Pool()
// delete previous entries for this index -> (db, table, index, pos)
keyBuilder.PutString(0, qual.Database)
keyBuilder.PutString(1, qual.Table())
keyBuilder.PutString(2, qual.Index())
keyBuilder.PutInt64(3, 0)
firstKey := keyBuilder.Build(pool)
keyBuilder.PutString(0, qual.Database)
keyBuilder.PutString(1, qual.Table())
keyBuilder.PutString(2, qual.Index())
keyBuilder.PutInt64(3, maxBucketFanout+1)
maxKey := keyBuilder.Build(pool)
// there is a limit on the number of buckets for a given index, iter
// will terminate before maxBucketFanout
iter, err := statsMap.IterKeyRange(ctx, firstKey, maxKey)
if err != nil {
return err
}
for {
k, _, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return err
}
err = statsMap.Put(ctx, k, nil)
if err != nil {
return err
}
}
return nil
}
func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
if ctx.Err() != nil {
return ctx.Err()
}
sch := schema.StatsTableDoltSchema
kd, vd := sch.GetMapDescriptors(statsMap.NodeStore())
keyBuilder := val.NewTupleBuilder(kd)
valueBuilder := val.NewTupleBuilder(vd)
qual := dStats.Qualifier()
pool := statsMap.NodeStore().Pool()
// now add new buckets
typesB := strings.Builder{}
sep := ""
for _, t := range dStats.Statistic.Typs {
typesB.WriteString(sep + t.String())
sep = "\n"
}
typesStr := typesB.String()
var pos int64
for _, h := range dStats.Hist {
keyBuilder.PutString(0, qual.Database)
keyBuilder.PutString(1, qual.Tab)
keyBuilder.PutString(2, qual.Idx)
keyBuilder.PutInt64(3, pos)
valueBuilder.PutInt64(0, schema.StatsVersion)
valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String())
valueBuilder.PutInt64(2, int64(h.RowCount()))
valueBuilder.PutInt64(3, int64(h.DistinctCount()))
valueBuilder.PutInt64(4, int64(h.NullCount()))
valueBuilder.PutString(5, strings.Join(dStats.Columns(), ","))
valueBuilder.PutString(6, typesStr)
boundRow, err := EncodeRow(ctx, statsMap.NodeStore(), h.UpperBound(), dStats.Tb)
if err != nil {
return err
}
valueBuilder.PutString(7, string(boundRow))
valueBuilder.PutInt64(8, int64(h.BoundCount()))
valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h))
for i, r := range h.Mcvs() {
mcvRow, err := EncodeRow(ctx, statsMap.NodeStore(), r, dStats.Tb)
if err != nil {
return err
}
valueBuilder.PutString(10+i, string(mcvRow))
}
var mcvCntsRow sql.Row
for _, v := range h.McvCounts() {
mcvCntsRow = append(mcvCntsRow, int(v))
}
valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, mcvsTypes))
key := keyBuilder.Build(pool)
value := valueBuilder.Build(pool)
statsMap.Put(ctx, key, value)
pos++
}
return nil
}
func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
for i, v := range r {
if v == nil {
continue
}
if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
return nil, err
}
}
return tb.Build(ns.Pool()), nil
}
func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
tup := []byte(s)
r := make(sql.Row, tb.Desc.Count())
var err error
for i, _ := range r {
r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
if err != nil {
return nil, err
}
}
return r, nil
}
@@ -1,351 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"fmt"
"strings"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly/tree"
)
const (
boostrapRowLimit = 2e6
)
func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error {
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return err
}
return p.RefreshTableStatsWithBranch(ctx, table, db, branch)
}
func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error {
dSess := dsess.DSessFromSess(ctx.Session)
branches := p.getStatsBranches(ctx)
var rows uint64
for _, branch := range branches {
sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
if err != nil {
if sql.ErrDatabaseNotFound.Is(err) {
// default branch is not valid
continue
}
return err
}
tables, err := sqlDb.GetTableNames(ctx)
if err != nil {
return err
}
for _, table := range tables {
sqlTable, _, err := GetLatestTable(ctx, table, sqlDb)
if err != nil {
return err
}
if st, ok := sqlTable.(sql.StatisticsTable); ok {
cnt, ok, err := st.RowCount(ctx)
if ok && err == nil {
rows += cnt
}
}
if rows >= boostrapRowLimit {
return fmt.Errorf("stats bootstrap aborted because %s exceeds the default row limit; manually run \"ANALYZE <table>\" or \"call dolt_stats_restart()\" to collect statistics", db)
}
if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil {
return err
}
}
}
return nil
}
func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error {
if !p.TryLockForUpdate(branch, db, table.Name()) {
return fmt.Errorf("already updating statistics")
}
defer p.UnlockTable(branch, db, table.Name())
dSess := dsess.DSessFromSess(ctx.Session)
sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
if err != nil {
return err
}
// lock only after accessing DatabaseProvider
tableName := strings.ToLower(table.Name())
dbName := strings.ToLower(db)
var schemaName string
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
}
iat, ok := table.(sql.IndexAddressableTable)
if !ok {
return nil
}
indexes, err := iat.GetIndexes(ctx)
if err != nil {
return err
}
// it's important to update WORKING session references every call
sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb)
if err != nil {
return err
}
statDb, ok := p.getStatDb(dbName)
if !ok {
// if the stats database does not exist, initialize one
fs, err := p.pro.FileSystemForDatabase(dbName)
if err != nil {
return err
}
sourceDb, ok := p.pro.BaseDatabase(ctx, dbName)
if !ok {
return sql.ErrDatabaseNotFound.New(dbName)
}
statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir)
if err != nil {
ctx.Warn(0, "%s", err.Error())
return nil
}
p.setStatDb(dbName, statDb)
}
schHash, err := dTab.GetSchemaHash(ctx)
if err != nil {
return err
}
if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, tableName); oldSchHash.IsEmpty() {
if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
return fmt.Errorf("set schema hash error: %w", err)
}
} else if oldSchHash != schHash {
ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
return err
}
stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, tableName)
if err != nil {
return err
}
for _, stat := range stats {
statDb.DeleteStats(ctx, branch, stat.Qualifier())
}
} else if err != nil {
return err
}
tablePrefix := fmt.Sprintf("%s.", tableName)
var idxMetas []indexMeta
for _, idx := range indexes {
cols := make([]string, len(idx.Expressions()))
for i, c := range idx.Expressions() {
cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
}
qual := sql.NewStatQualifier(db, schemaName, table.Name(), strings.ToLower(idx.ID()))
curStat, ok := statDb.GetStat(branch, qual)
if !ok {
curStat = NewDoltStats()
curStat.Statistic.Qual = qual
}
idxMeta, ok, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
if err != nil {
return err
}
if ok {
idxMetas = append(idxMetas, idxMeta)
}
}
newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
if err != nil {
return err
}
// merge new chunks with preexisting chunks
for _, idxMeta := range idxMetas {
stat, ok := newTableStats[idxMeta.qual]
if !ok {
continue
}
targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist)
if err != nil {
return err
}
if targetChunks == nil {
// empty table
continue
}
stat.SetChunks(idxMeta.allAddrs)
stat.Hist = targetChunks
stat.UpdateActive()
if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil {
return err
}
}
p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
return statDb.Flush(ctx, branch)
}
// BranchQualifiedDatabase returns a branch qualified database. If the database
// is already branch suffixed no duplication is applied.
func BranchQualifiedDatabase(db, branch string) string {
suffix := fmt.Sprintf("/%s", branch)
if !strings.HasSuffix(db, suffix) {
return fmt.Sprintf("%s%s", db, suffix)
}
return db
}
// GetLatestTable will get the WORKING root table for the current database/branch
func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) {
var db sqle.Database
switch d := sqlDb.(type) {
case sqle.Database:
db = d
case sqle.ReadReplicaDatabase:
db = d.Database
default:
return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
}
sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
if err != nil {
return nil, nil, err
}
if !ok {
return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
}
var dTab *doltdb.Table
switch t := sqlTable.(type) {
case *sqle.AlterableDoltTable:
dTab, err = t.DoltTable.DoltTable(ctx)
case *sqle.WritableDoltTable:
dTab, err = t.DoltTable.DoltTable(ctx)
case *sqle.DoltTable:
dTab, err = t.DoltTable(ctx)
default:
err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
}
if err != nil {
return nil, nil, err
}
return sqlTable, dTab, nil
}
func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, bool, error) {
var idx durable.Index
var err error
if strings.EqualFold(sqlIndex.ID(), "PRIMARY") {
idx, err = doltTable.GetRowData(ctx)
} else {
idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID())
}
if err != nil {
return indexMeta{}, false, err
}
prollyMap, ok := durable.MaybeProllyMapFromIndex(idx)
if !ok {
return indexMeta{}, false, nil
}
if cnt, err := prollyMap.Count(); err != nil {
return indexMeta{}, false, err
} else if cnt == 0 {
return indexMeta{
qual: curStats.Statistic.Qual,
cols: cols,
}, true, nil
}
// get newest histogram target level hashes
levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
if err != nil {
return indexMeta{}, false, err
}
var addrs []hash.Hash
var keepChunks []sql.HistogramBucket
var missingAddrs float64
var missingChunks []tree.Node
var missingOffsets []updateOrdinal
var offset uint64
for _, n := range levelNodes {
// Compare the previous histogram chunks to the newest tree chunks.
// Partition the newest chunks into 1) preserved or 2) missing.
// Missing chunks will need to be scanned on a stats update, so
// track the (start, end) ordinal offsets to simplify the read iter.
treeCnt, err := n.TreeCount()
if err != nil {
return indexMeta{}, false, err
}
addrs = append(addrs, n.HashOf())
if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok {
missingChunks = append(missingChunks, n)
missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)})
missingAddrs++
} else {
keepChunks = append(keepChunks, curStats.Hist[bucketIdx])
}
offset += uint64(treeCnt)
}
var dropChunks []sql.HistogramBucket
for _, h := range curStats.Chunks {
var match bool
for _, b := range keepChunks {
if DoltBucketChunk(b) == h {
match = true
break
}
}
if !match {
dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]])
}
}
return indexMeta{
qual: curStats.Statistic.Qual,
cols: cols,
newNodes: missingChunks,
updateOrdinals: missingOffsets,
keepChunks: keepChunks,
dropChunks: dropChunks,
allAddrs: addrs,
}, true, nil
}
@@ -1,296 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/dolthub/go-mysql-server/sql"
types2 "github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
const asyncAutoRefreshStats = "async_auto_refresh_stats"
func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error {
_, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
_, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
interval64, _, _ := types2.Int64.Convert(interval)
intervalSec := time.Second * time.Duration(interval64.(int64))
thresholdf64 := threshold.(float64)
ctx, err := ctxFactory(context.Background())
if err != nil {
return err
}
branches := p.getStatsBranches(ctx)
return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches)
}
func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error {
// this is only called after initial statistics are finished loading
// launch a thread that periodically checks freshness
p.mu.Lock()
defer p.mu.Unlock()
dropDbCtx, dbStatsCancel := context.WithCancel(context.Background())
p.autoCtxCancelers[dbName] = dbStatsCancel
return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) {
ticker := time.NewTicker(checkInterval + time.Nanosecond)
for {
select {
case <-ctx.Done():
ticker.Stop()
return
case <-ticker.C:
select {
case <-dropDbCtx.Done():
ticker.Stop()
return
default:
}
err := func() error {
sqlCtx, err := ctxFactory(ctx)
if err != nil {
return err
}
defer sql.SessionEnd(sqlCtx.Session)
sql.SessionCommandBegin(sqlCtx.Session)
defer sql.SessionCommandEnd(sqlCtx.Session)
dSess := dsess.DSessFromSess(sqlCtx.Session)
ddb, ok := dSess.GetDoltDB(sqlCtx, dbName)
if !ok {
sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName)
return errors.New("database not found")
}
for _, branch := range branches {
if br, ok, err := ddb.HasBranch(sqlCtx, branch); ok {
sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String())
// update WORKING session references
sqlDb, err := dSess.Provider().Database(sqlCtx, BranchQualifiedDatabase(dbName, branch))
if err != nil {
sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
return err
}
if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil {
sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
return err
}
} else if err != nil {
sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error())
} else {
sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br)
}
}
return nil
}()
if err != nil {
return
}
}
}
})
}
func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error {
if !p.TryLockForUpdate(branch, dbName, "") {
return fmt.Errorf("database already being updated: %s/%s", branch, dbName)
}
defer p.UnlockTable(branch, dbName, "")
// Iterate all dbs, tables, indexes. Each db will collect
// []indexMeta above refresh threshold. We read and process those
// chunks' statistics. We merge updated chunks with precomputed
// chunks. The full set of statistics for each database lands
// 1) in the provider's most recent set of database statistics, and
// 2) on disk in the database's statistics ref'd prolly.Map.
statDb, ok := p.getStatDb(dbName)
if !ok {
return sql.ErrDatabaseNotFound.New(dbName)
}
var deletedStats []sql.StatQualifier
qualExists := make(map[sql.StatQualifier]bool)
tableExistsAndSkipped := make(map[string]bool)
tables, err := sqlDb.GetTableNames(ctx)
if err != nil {
return err
}
for _, table := range tables {
if !p.TryLockForUpdate(branch, dbName, table) {
ctx.GetLogger().Debugf("statistics refresh: table is already being updated: %s/%s.%s", branch, dbName, table)
return fmt.Errorf("table already being updated: %s", table)
}
defer p.UnlockTable(branch, dbName, table)
sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb)
if err != nil {
return err
}
tableHash, err := dTab.GetRowDataHash(ctx)
if err != nil {
return err
}
if statDb.GetTableHash(branch, table) == tableHash {
// no data changes since last check
tableExistsAndSkipped[table] = true
ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash)
continue
} else {
ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash)
}
schHash, err := dTab.GetSchemaHash(ctx)
if err != nil {
return err
}
var schemaName string
if schTab, ok := sqlTable.(sql.DatabaseSchemaTable); ok {
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
}
if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, table); oldSchHash.IsEmpty() {
if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
return err
}
} else if oldSchHash != schHash {
ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
return err
}
stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, table)
if err != nil {
return err
}
for _, stat := range stats {
statDb.DeleteStats(ctx, branch, stat.Qualifier())
}
} else if err != nil {
return err
}
iat, ok := sqlTable.(sql.IndexAddressableTable)
if !ok {
return fmt.Errorf("table does not support indexes %s", table)
}
indexes, err := iat.GetIndexes(ctx)
if err != nil {
return err
}
// collect indexes and ranges to be updated
var idxMetas []indexMeta
for _, index := range indexes {
qual := sql.NewStatQualifier(dbName, schemaName, table, strings.ToLower(index.ID()))
qualExists[qual] = true
curStat, ok := statDb.GetStat(branch, qual)
if !ok {
curStat = NewDoltStats()
curStat.Statistic.Qual = qual
cols := make([]string, len(index.Expressions()))
tablePrefix := fmt.Sprintf("%s.", table)
for i, c := range index.Expressions() {
cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
}
curStat.Statistic.Cols = cols
}
ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String())
updateMeta, ok, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
if err != nil {
ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
continue
}
if !ok {
continue
}
curCnt := float64(len(curStat.Active))
updateCnt := float64(len(updateMeta.newNodes))
deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks))
ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt))
if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh {
if curCnt == 0 && updateCnt == 0 {
continue
}
ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual)
// mark index for updating
idxMetas = append(idxMetas, updateMeta)
// update latest hash if we haven't already
statDb.SetTableHash(branch, table, tableHash)
}
}
// get new buckets for index chunks to update
newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
if err != nil {
return err
}
// merge new chunks with preexisting chunks
for _, updateMeta := range idxMetas {
stat := newTableStats[updateMeta.qual]
if stat != nil {
var err error
if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok {
err = statDb.SetStat(ctx, branch, updateMeta.qual, stat)
} else {
err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist)
}
if err != nil {
return err
}
p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
}
}
}
for _, q := range statDb.ListStatQuals(branch) {
// table or index delete leaves hole in stats
// this is separate from threshold check
if !tableExistsAndSkipped[q.Table()] && !qualExists[q] {
// only delete stats we've verified are deleted
deletedStats = append(deletedStats, q)
}
}
statDb.DeleteStats(ctx, branch, deletedStats...)
if err := statDb.Flush(ctx, branch); err != nil {
return err
}
return nil
}
@@ -1,4 +1,4 @@
// Copyright 2023 Dolthub, Inc.
// Copyright 2023-2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -17,19 +17,11 @@ package statspro
import (
"container/heap"
"context"
"errors"
"fmt"
"io"
"sort"
"strings"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
@@ -40,156 +32,7 @@ const (
mcvCnt = 3
)
// createNewStatsBuckets builds histograms for a list of index statistic metadata.
// We only read chunk ranges indicated by |indexMeta.updateOrdinals|. If
// the returned buckets are a subset of the index the caller is responsible
// for reconciling the difference.
func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Table, indexes []sql.Index, idxMetas []indexMeta) (map[sql.StatQualifier]*DoltStats, error) {
nameToIdx := make(map[string]sql.Index)
for _, idx := range indexes {
nameToIdx[strings.ToLower(idx.ID())] = idx
}
ret := make(map[sql.StatQualifier]*DoltStats)
for _, meta := range idxMetas {
sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())]
if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() {
continue
}
var idx durable.Index
var err error
if strings.EqualFold(meta.qual.Index(), "PRIMARY") {
idx, err = dTab.GetRowData(ctx)
} else {
idx, err = dTab.GetIndexRowData(ctx, meta.qual.Index())
}
if err != nil {
return nil, err
}
prollyMap := durable.ProllyMapFromIndex(idx)
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc())
fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx)
if err != nil {
return nil, err
}
var types []sql.Type
for _, cet := range nameToIdx[strings.ToLower(meta.qual.Index())].ColumnExpressionTypes() {
types = append(types, cet.Type)
}
if cnt, err := prollyMap.Count(); err != nil {
return nil, err
} else if cnt == 0 {
// table is empty
ret[meta.qual] = NewDoltStats()
ret[meta.qual].Statistic.Created = time.Now()
ret[meta.qual].Statistic.Cols = meta.cols
ret[meta.qual].Statistic.Typs = types
ret[meta.qual].Statistic.Qual = meta.qual
ret[meta.qual].Statistic.Fds = fds
ret[meta.qual].Statistic.Colset = colSet
ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
continue
}
firstRow, err := firstRowForIndex(ctx, prollyMap, keyBuilder, len(meta.cols))
if err != nil {
return nil, err
}
updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc())
ret[meta.qual] = NewDoltStats()
ret[meta.qual].Chunks = meta.allAddrs
ret[meta.qual].Statistic.Created = time.Now()
ret[meta.qual].Statistic.Cols = meta.cols
ret[meta.qual].Statistic.Typs = types
ret[meta.qual].Statistic.Qual = meta.qual
ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
var start, stop uint64
// read leaf rows for each bucket
for i, chunk := range meta.newNodes {
// each node is a bucket
updater.newBucket()
// we read exclusive range [node first key, next node first key)
start, stop = meta.updateOrdinals[i].start, meta.updateOrdinals[i].stop
iter, err := prollyMap.IterOrdinalRange(ctx, start, stop)
if err != nil {
return nil, err
}
for {
// stats key will be a prefix of the index key
keyBytes, _, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return nil, err
}
// build full key
for i := range keyBuilder.Desc.Types {
keyBuilder.PutRaw(i, keyBytes.GetField(i))
}
updater.add(ctx, keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
keyBuilder.Recycle()
}
// finalize the aggregation
bucket, err := updater.finalize(ctx, prollyMap.NodeStore())
if err != nil {
return nil, err
}
bucket.Chunk = chunk.HashOf()
ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket)
}
ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct)
ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount)
ret[updater.qual].Statistic.LowerBnd = firstRow
ret[updater.qual].Statistic.Fds = fds
ret[updater.qual].Statistic.Colset = colSet
ret[updater.qual].UpdateActive()
}
return ret, nil
}
// MergeNewChunks combines a set of old and new chunks to create
// the desired target histogram. Undefined behavior if a |targetHash|
// does not exist in either |oldChunks| or |newChunks|.
func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) {
hashToPos := make(map[hash.Hash]int, len(inputHashes))
for i, h := range inputHashes {
hashToPos[h] = i
}
var cnt int
targetBuckets := make([]sql.HistogramBucket, len(inputHashes))
for _, c := range oldChunks {
if idx, ok := hashToPos[DoltBucketChunk(c)]; ok {
cnt++
targetBuckets[idx] = c
}
}
for _, c := range newChunks {
if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil {
cnt++
targetBuckets[idx] = c
}
}
if cnt != len(inputHashes) {
return nil, fmt.Errorf("encountered invalid statistic chunks")
}
return targetBuckets, nil
}
func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder, prefixLen int) (sql.Row, error) {
func firstRowForIndex(ctx *sql.Context, idxLen int, prollyMap prolly.Map, keyBuilder *val.TupleBuilder) (sql.Row, error) {
if cnt, err := prollyMap.Count(); err != nil {
return nil, err
} else if cnt == 0 {
@@ -211,9 +54,9 @@ func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.Tu
keyBuilder.PutRaw(i, keyBytes.GetField(i))
}
firstKey := keyBuilder.BuildPrefixNoRecycle(buffPool, prefixLen)
firstRow := make(sql.Row, prefixLen)
for i := 0; i < prefixLen; i++ {
firstKey := keyBuilder.Build(buffPool)
firstRow := make(sql.Row, idxLen)
for i := range firstRow {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, err
@@ -269,7 +112,7 @@ func (u *bucketBuilder) newBucket() {
// finalize converts the current aggregation stats into a histogram bucket,
// which includes deserializing most common value tuples into sql.Rows.
func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBucket, error) {
func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (*stats.Bucket, error) {
// update MCV in case we've ended on a run of many identical keys
u.updateMcv()
@@ -279,27 +122,25 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu
// convert the MCV tuples into SQL rows (most efficient to only do this once)
mcvRows, err := u.mcvs.Values(ctx, u.tupleDesc, ns, u.prefixLen)
if err != nil {
return DoltBucket{}, err
return nil, err
}
upperBound := make(sql.Row, u.prefixLen)
if u.currentKey != nil {
for i := 0; i < u.prefixLen; i++ {
upperBound[i], err = tree.GetField(ctx, u.tupleDesc, i, u.currentKey, ns)
if err != nil {
return DoltBucket{}, err
return nil, err
}
}
}
return DoltBucket{
Bucket: &stats.Bucket{
RowCnt: uint64(u.count),
DistinctCnt: uint64(u.distinct),
BoundCnt: uint64(u.currentCnt),
McvVals: mcvRows,
McvsCnt: u.mcvs.Counts(),
BoundVal: upperBound,
NullCnt: uint64(u.nulls),
},
return &stats.Bucket{
RowCnt: uint64(u.count),
DistinctCnt: uint64(u.distinct),
BoundCnt: uint64(u.currentCnt),
McvVals: mcvRows,
McvsCnt: u.mcvs.Counts(),
BoundVal: upperBound,
NullCnt: uint64(u.nulls),
}, nil
}
@@ -1,4 +1,4 @@
// Copyright 2023 Dolthub, Inc.
// Copyright 2023-2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -61,27 +61,27 @@ func TestBucketBuilder(t *testing.T) {
name string
keys []sql.Row
keyDesc val.TupleDesc
bucket DoltBucket
bucket *stats.Bucket
}{
{
name: "ints",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 5,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
}},
},
},
{
// technically nulls should be at beginning
name: "ints with middle nulls",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 16,
DistinctCnt: 6,
NullCnt: 3,
@@ -89,13 +89,13 @@ func TestBucketBuilder(t *testing.T) {
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
}},
},
},
{
name: "ints with beginning nulls",
keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 6,
NullCnt: 2,
@@ -103,86 +103,86 @@ func TestBucketBuilder(t *testing.T) {
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
}},
},
},
{
name: "more ints",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 22,
DistinctCnt: 7,
BoundCnt: 1,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(7)},
}},
},
},
{
name: "2-ints",
keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 11,
McvVals: []sql.Row{{int64(4), int64(1)}},
McvsCnt: []uint64{3},
BoundVal: sql.Row{int64(5), int64(2)},
BoundCnt: 1,
}},
},
},
{
name: "2-ints with nulls",
keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 5,
DistinctCnt: 5,
NullCnt: 3,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(2), int64(2)},
BoundCnt: 1},
BoundCnt: 1,
},
},
{
name: "varchars",
keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 9,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{"i"},
BoundCnt: 2,
}},
},
},
{
name: "varchar-ints",
keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 12,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{"i", int64(1)},
BoundCnt: 2,
}},
},
},
{
name: "mcvs",
keys: []sql.Row{{1}, {2}, {3}, {4}, {5}, {6}, {7}, {7}, {7}, {7}, {8}, {9}, {10}, {10}, {10}, {11}, {12}, {13}, {14}, {15}, {20}, {21}, {22}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
bucket: DoltBucket{Bucket: &stats.Bucket{
bucket: &stats.Bucket{
RowCnt: 23,
DistinctCnt: 18,
McvVals: []sql.Row{{int64(10)}, {int64(7)}},
McvsCnt: []uint64{3, 4},
BoundVal: sql.Row{int64(22)},
BoundCnt: 1,
}},
},
},
}
@@ -1,161 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"fmt"
"strings"
"time"
"github.com/dolthub/go-mysql-server/sql"
types2 "github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
)
var helpMsg = "call dolt_stats_purge() to reset statistics"
func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Context) (*sql.Context, error), bThreads *sql.BackgroundThreads, dbs []dsess.SqlDatabase) error {
p.SetStarter(NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
if _, disabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly); disabled == int8(1) {
return nil
}
loadCtx, err := ctxFactory(ctx)
if err != nil {
return err
}
defer sql.SessionEnd(loadCtx.Session)
sql.SessionCommandBegin(loadCtx.Session)
defer sql.SessionCommandEnd(loadCtx.Session)
branches := p.getStatsBranches(loadCtx)
var autoEnabled bool
var startupEnabled bool
var intervalSec time.Duration
var thresholdf64 float64
if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) {
autoEnabled = true
_, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
_, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
interval64, _, _ := types2.Int64.Convert(interval)
intervalSec = time.Second * time.Duration(interval64.(int64))
thresholdf64 = threshold.(float64)
p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
p.pro.DropDatabaseHooks = append([]sqle.DropDatabaseHook{NewStatsDropDatabaseHook(p)}, p.pro.DropDatabaseHooks...)
} else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) {
startupEnabled = true
}
eg, ctx := loadCtx.NewErrgroup()
for _, db := range dbs {
// copy closure variables
db := db
eg.Go(func() (err error) {
defer func() {
if r := recover(); r != nil {
if str, ok := r.(fmt.Stringer); ok {
err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String())
} else {
err = fmt.Errorf("%w: %v", ErrFailedToLoad, r)
}
return
}
}()
fs, err := p.pro.FileSystemForDatabase(db.Name())
if err != nil {
return err
}
if p.Load(loadCtx, fs, db, branches); err != nil {
return err
}
if autoEnabled {
return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches)
} else if startupEnabled {
if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil {
return err
}
}
return nil
})
}
return eg.Wait()
}
// getStatsBranches returns the set of branches whose statistics are tracked.
// The order of precedence is (1) global variable, (2) session current branch,
// (3) engine default branch.
func (p *Provider) getStatsBranches(ctx *sql.Context) []string {
dSess := dsess.DSessFromSess(ctx.Session)
var branches []string
if _, bs, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranches); bs == "" {
defaultBranch, _ := dSess.GetBranch()
if defaultBranch != "" {
branches = append(branches, defaultBranch)
}
} else {
for _, branch := range strings.Split(bs.(string), ",") {
branches = append(branches, strings.TrimSpace(branch))
}
}
if branches == nil {
branches = append(branches, p.pro.DefaultBranch())
}
return branches
}
func (p *Provider) LoadStats(ctx *sql.Context, db, branch string) error {
if statDb, ok := p.getStatDb(db); ok {
return statDb.LoadBranchStats(ctx, branch)
}
return nil
}
// Load scans the statistics tables, populating the |stats| attribute.
// Statistics are not available for reading until we've finished loading.
func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDatabase, branches []string) {
// |statPath| is either file://./stat or mem://stat
statsDb, err := p.sf.Init(ctx, db, p.pro, fs, env.GetCurrentUserHomeDir)
if err != nil {
ctx.GetLogger().Errorf("initialize stats failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
return
}
for _, branch := range branches {
if err = statsDb.LoadBranchStats(ctx, branch); err != nil {
// if branch name is invalid, continue loading rest
// TODO: differentiate bad branch name from other errors
ctx.GetLogger().Errorf("load stats init failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
continue
}
if err := statsDb.Flush(ctx, branch); err != nil {
ctx.GetLogger().Errorf("load stats flush failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
continue
}
}
p.setStatDb(strings.ToLower(db.Name()), statsDb)
return
}
@@ -0,0 +1,630 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"path"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/sirupsen/logrus"
"github.com/dolthub/dolt/go/cmd/dolt/doltversion"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro/jobqueue"
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
"github.com/dolthub/dolt/go/libraries/utils/earl"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/types"
"github.com/dolthub/dolt/go/store/val"
)
var _ sql.StatsProvider = (*StatsController)(nil)
type ctxFactory func(ctx context.Context) (*sql.Context, error)
type tableIndexesKey struct {
db string
branch string
table string
schema string
}
func (k tableIndexesKey) String() string {
if k.table != "" {
return k.schema + "/" + k.db + "/" + k.branch + "/" + k.table
}
return k.db + "/" + k.branch + "/" + k.table
}
type StatsController struct {
logger *logrus.Logger
pro *sqle.DoltDatabaseProvider
bgThreads *sql.BackgroundThreads
statsBackingDb filesys.Filesys
hdpEnv *env.DoltEnv
dbFs map[string]filesys.Filesys
// ctxGen lets us fetch the most recent working root
ctxGen ctxFactory
sq *jobqueue.SerialQueue
activeCtxCancel context.CancelFunc
listeners []listener
JobInterval time.Duration
gcInterval time.Duration
memOnly bool
enableGc bool
doGc bool
Debug bool
closed chan struct{}
// kv is a content-addressed cache of histogram objects:
// buckets, first bounds, and schema-specific statistic
// templates.
kv StatsKv
// Stats tracks table statistics accessible to sessions.
Stats *rootStats
// mu protects all shared object access
mu sync.Mutex
// genCnt is used to atomically swap Stats, same behavior
// as last-writer wins
genCnt atomic.Uint64
gcCnt int
}
type rootStats struct {
hashes map[tableIndexesKey]hash.Hash
stats map[tableIndexesKey][]*stats.Statistic
DbCnt int `json:"dbCnt"`
BucketWrites int `json:"bucketWrites"`
TablesProcessed int `json:"tablesProcessed"`
TablesSkipped int `json:"tablesSkipped"`
}
func newRootStats() *rootStats {
return &rootStats{
hashes: make(map[tableIndexesKey]hash.Hash),
stats: make(map[tableIndexesKey][]*stats.Statistic),
}
}
func (rs *rootStats) String() string {
str, _ := json.Marshal(rs)
return string(str)
}
func NewStatsController(logger *logrus.Logger, dEnv *env.DoltEnv) *StatsController {
sq := jobqueue.NewSerialQueue().WithErrorCb(func(err error) {
logger.Error(err)
})
return &StatsController{
mu: sync.Mutex{},
logger: logger,
JobInterval: 500 * time.Millisecond,
gcInterval: 24 * time.Hour,
sq: sq,
Stats: newRootStats(),
dbFs: make(map[string]filesys.Filesys),
closed: make(chan struct{}),
kv: NewMemStats(),
hdpEnv: dEnv,
genCnt: atomic.Uint64{},
}
}
func (sc *StatsController) SetBackgroundThreads(bgThreads *sql.BackgroundThreads) {
sc.bgThreads = bgThreads
}
func (sc *StatsController) SetMemOnly(v bool) {
sc.mu.Lock()
defer sc.mu.Unlock()
sc.memOnly = v
}
func (sc *StatsController) SetEnableGc(v bool) {
sc.mu.Lock()
defer sc.mu.Unlock()
sc.enableGc = v
}
func (sc *StatsController) setDoGc(force bool) {
sc.mu.Lock()
defer sc.mu.Unlock()
if sc.enableGc || force {
sc.doGc = true
}
}
func (sc *StatsController) gcIsSet() bool {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.doGc
}
// SetTimers can only be called after Init
func (sc *StatsController) SetTimers(job, gc int64) {
sc.mu.Lock()
defer sc.mu.Unlock()
sc.sq.NewRateLimit(time.Duration(max(1, job)))
sc.gcInterval = time.Duration(gc)
}
func (sc *StatsController) AddFs(ctx *sql.Context, db dsess.SqlDatabase, fs filesys.Filesys, rotateOk bool) error {
sc.mu.Lock()
defer sc.mu.Unlock()
firstDb := len(sc.dbFs) == 0
sc.dbFs[db.AliasedName()] = fs
if rotateOk && firstDb {
return sc.lockedRotateStorage(ctx)
}
return nil
}
func (sc *StatsController) Info(ctx context.Context) (dprocedures.StatsInfo, error) {
sc.mu.Lock()
defer sc.mu.Unlock()
// don't use protected access / deadlock
cachedBucketCnt := sc.kv.Len()
storageCnt, err := sc.kv.Flush(ctx)
if err != nil {
return dprocedures.StatsInfo{}, err
}
var cachedBoundCnt int
var cachedTemplateCnt int
var backing string
switch kv := sc.kv.(type) {
case *memStats:
cachedBoundCnt = len(kv.bounds)
cachedTemplateCnt = len(kv.templates)
backing = "memory"
case *prollyStats:
cachedBoundCnt = len(kv.mem.bounds)
cachedTemplateCnt = len(kv.mem.templates)
backing, _ = sc.statsBackingDb.Abs("")
}
return dprocedures.StatsInfo{
DbCnt: sc.Stats.DbCnt,
Active: sc.activeCtxCancel != nil,
CachedBucketCnt: cachedBucketCnt,
StorageBucketCnt: storageCnt,
CachedBoundCnt: cachedBoundCnt,
CachedTemplateCnt: cachedTemplateCnt,
StatCnt: len(sc.Stats.stats),
GenCnt: int(sc.genCnt.Load()),
GcCnt: sc.gcCnt,
Backing: filepath.Base(backing),
}, nil
}
func (sc *StatsController) descError(d string, err error) {
if errors.Is(err, context.Canceled) {
return
}
if sc.Debug {
log.Println("stats error: ", err.Error())
}
b := strings.Builder{}
b.WriteString("stats error;")
if d != "" {
b.WriteString("; " + d)
}
if err != nil {
b.WriteString("; " + err.Error())
}
sc.logger.Debug(b.String())
}
func (sc *StatsController) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
key, err := sc.statsKey(ctx, db, table.Name())
if err != nil {
return nil, err
}
sc.mu.Lock()
defer sc.mu.Unlock()
if sc.Stats == nil {
return nil, nil
}
st := sc.Stats.stats[key]
var ret []sql.Statistic
for _, s := range st {
ret = append(ret, s)
}
return ret, nil
}
func (sc *StatsController) AnalyzeTable(ctx *sql.Context, table sql.Table, dbName string) (err error) {
dSess := dsess.DSessFromSess(ctx.Session)
var branch string
if strings.Contains(dbName, "/") {
parts := strings.Split(dbName, "/")
if len(parts) == 2 {
dbName = parts[0]
branch = parts[1]
}
}
if branch == "" {
var err error
branch, err = dSess.GetBranch()
if err != nil {
return err
}
if branch == "" {
branch = env.DefaultInitBranch
}
}
db, err := sc.pro.Database(ctx, dbName)
sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), branch, branch+"/"+dbName)
if err != nil {
return err
}
newStats := newRootStats()
err = sc.updateTable(ctx, newStats, table.Name(), sqlDb, nil)
if err != nil {
return err
}
sc.mu.Lock()
for k, v := range newStats.stats {
sc.Stats.stats[k] = v
sc.Stats.hashes[k] = newStats.hashes[k]
}
sc.mu.Unlock()
return err
}
func (sc *StatsController) SetStats(ctx *sql.Context, s sql.Statistic) error {
sc.mu.Lock()
defer sc.mu.Unlock()
ss, ok := s.(*stats.Statistic)
if !ok {
return fmt.Errorf("expected *stats.Statistics, found %T", s)
}
key, err := sc.statsKey(ctx, ss.Qualifier().Db(), ss.Qualifier().Table())
if err != nil {
return err
}
// not efficient, but this is only used for testing
var newStats []*stats.Statistic
for _, ss := range sc.Stats.stats[key] {
if !strings.EqualFold(ss.Qualifier().Index(), s.Qualifier().Index()) {
newStats = append(newStats, ss)
}
}
newStats = append(newStats, ss)
sc.Stats.stats[key] = newStats
return nil
}
func (sc *StatsController) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
sc.mu.Lock()
defer sc.mu.Unlock()
key, err := sc.statsKey(ctx, qual.Database, qual.Table())
if err != nil {
return nil, false
}
for _, s := range sc.Stats.stats[key] {
if strings.EqualFold(s.Qualifier().Index(), qual.Index()) {
return s, true
}
}
return nil, false
}
func (sc *StatsController) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) {
key := tableIndexesKey{
db: strings.ToLower(db),
branch: strings.ToLower(branch),
table: strings.ToLower(table),
schema: strings.ToLower(schema),
}
sc.mu.Lock()
defer sc.mu.Unlock()
if sc.Stats == nil {
return nil, nil
}
return sc.Stats.stats[key], nil
}
func (sc *StatsController) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
key, err := sc.statsKey(ctx, qual.Database, qual.Table())
if err != nil {
return err
}
sc.mu.Lock()
defer sc.mu.Unlock()
delete(sc.Stats.stats, key)
return nil
}
func (sc *StatsController) DropDbStats(ctx *sql.Context, dbName string, flush bool) error {
sc.mu.Lock()
defer sc.mu.Unlock()
dbFs := sc.dbFs[dbName]
delete(sc.dbFs, dbName)
if sc.statsBackingDb == dbFs {
// don't wait to see if the thread context is invalidated
func() {
sc.mu.Unlock()
sc.Restart()
defer sc.mu.Lock()
}()
if err := sc.lockedRotateStorage(ctx); err != nil {
return err
}
}
var deleteKeys []tableIndexesKey
for k, _ := range sc.Stats.stats {
if strings.EqualFold(dbName, k.db) {
deleteKeys = append(deleteKeys, k)
}
}
for _, k := range deleteKeys {
delete(sc.Stats.stats, k)
}
return nil
}
func (sc *StatsController) statsKey(ctx *sql.Context, dbName, table string) (tableIndexesKey, error) {
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return tableIndexesKey{}, err
}
key := tableIndexesKey{
db: strings.ToLower(dbName),
branch: strings.ToLower(branch),
table: strings.ToLower(table),
}
return key, nil
}
func (sc *StatsController) RowCount(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
key, err := sc.statsKey(ctx, dbName, table.Name())
if err != nil {
return 0, err
}
sc.mu.Lock()
defer sc.mu.Unlock()
for _, s := range sc.Stats.stats[key] {
if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
return s.RowCnt, nil
}
}
return 0, nil
}
func (sc *StatsController) DataLength(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
key, err := sc.statsKey(ctx, dbName, table.Name())
if err != nil {
return 0, err
}
sc.mu.Lock()
defer sc.mu.Unlock()
for _, s := range sc.Stats.stats[key] {
if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
return s.RowCnt, nil
}
}
return 0, nil
}
func (sc *StatsController) Purge(ctx *sql.Context) error {
genStart := sc.genCnt.Load()
newKv := NewMemStats()
newKv.gcGen = genStart
newStats := newRootStats()
if ok, err := sc.trySwapStats(ctx, genStart, newStats, newKv); !ok {
return fmt.Errorf("failed to purge stats")
} else if err != nil {
return err
}
return nil
}
func (sc *StatsController) rotateStorage(ctx context.Context) error {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.lockedRotateStorage(ctx)
}
func (sc *StatsController) lockedRotateStorage(ctx context.Context) error {
if sc.memOnly {
return nil
}
if sc.statsBackingDb != nil {
if err := sc.rm(sc.statsBackingDb); err != nil {
return err
}
}
var mem *memStats
switch kv := sc.kv.(type) {
case *prollyStats:
mem = kv.mem
case *memStats:
mem = kv
default:
mem = NewMemStats()
}
if len(sc.dbFs) == 0 {
sc.kv = mem
sc.statsBackingDb = nil
return nil
}
var newStorageTarget filesys.Filesys
for _, dbFs := range sc.dbFs {
newStorageTarget = dbFs
if newStorageTarget == sc.statsBackingDb {
// prefer continuity when possible
break
}
}
if err := sc.rm(newStorageTarget); err != nil {
return err
}
sqlCtx, err := sc.ctxGen(ctx)
if err != nil {
return err
}
defer sql.SessionEnd(sqlCtx.Session)
sql.SessionCommandBegin(sqlCtx.Session)
defer sql.SessionCommandEnd(sqlCtx.Session)
newKv, err := sc.initStorage(sqlCtx, newStorageTarget)
if err != nil {
return err
}
newKv.mem = mem
sc.kv = newKv
sc.statsBackingDb = newStorageTarget
return nil
}
func (sc *StatsController) rm(fs filesys.Filesys) error {
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
if err != nil {
return err
}
if ok, _ := statsFs.Exists(""); ok {
if err := statsFs.Delete("", true); err != nil {
return err
}
}
dropDbLoc, err := statsFs.Abs("")
if err != nil {
return err
}
//log.Println("rm", dropDbLoc)
if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
return err
}
return nil
}
func (sc *StatsController) initStorage(ctx context.Context, fs filesys.Filesys) (*prollyStats, error) {
if sc.hdpEnv == nil {
return nil, fmt.Errorf("cannot initialize *prollKv, missing homeDirProvider")
}
params := make(map[string]interface{})
params[dbfactory.GRPCDialProviderParam] = env.NewGRPCDialProviderFromDoltEnv(sc.hdpEnv)
var urlPath string
u, err := earl.Parse(sc.pro.DbFactoryUrl())
if u.Scheme == dbfactory.MemScheme {
urlPath = path.Join(sc.pro.DbFactoryUrl(), dbfactory.DoltDataDir)
} else if u.Scheme == dbfactory.FileScheme {
urlPath = doltdb.LocalDirDoltDB
}
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
if err != nil {
return nil, err
}
var dEnv *env.DoltEnv
exists, isDir := statsFs.Exists("")
if !exists {
err := statsFs.MkDirs("")
if err != nil {
return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
}
dEnv = env.Load(ctx, sc.hdpEnv.GetUserHomeDir, statsFs, urlPath, "test")
err = dEnv.InitRepo(ctx, types.Format_Default, "stats", "stats@stats.com", env.DefaultInitBranch)
if err != nil {
return nil, err
}
} else if !isDir {
return nil, fmt.Errorf("file exists where the dolt stats directory should be")
} else {
dEnv = env.LoadWithoutDB(ctx, sc.hdpEnv.GetUserHomeDir, statsFs, "", doltversion.Version)
}
if err := dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params); err != nil {
return nil, err
}
deaf := dEnv.DbEaFactory(ctx)
tmpDir, err := dEnv.TempTableFilesDir()
if err != nil {
return nil, err
}
opts := editor.Options{
Deaf: deaf,
Tempdir: tmpDir,
}
statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
if err != nil {
return nil, err
}
m, err := dEnv.DbData(ctx).Ddb.GetStatistics(ctx)
if err == nil {
// use preexisting map
kd, vd := m.Descriptors()
return &prollyStats{
mu: sync.Mutex{},
destDb: statsDb,
kb: val.NewTupleBuilder(kd),
vb: val.NewTupleBuilder(vd),
m: m.Mutate(),
mem: NewMemStats(),
}, nil
}
return NewProllyStats(ctx, statsDb)
}
@@ -0,0 +1,78 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
// Package statspro provides a queue that manages table statistics
// management and access.
//
// At any given time there is one work generating thread, one scheduling
// thread, and one execution thread.
//
// The worker loop fetches the most recent session root,
// reads all of its databases/tables/ indexes, collects statistics
// for those objects, and updates the shared statistics state. Every
// cycle replaces the shared state.
//
// Work is delegated to the scheduler thread, which serializes
// issuer jobs with concurrent async requests, and rate limits sending
// jobs to the execution thread. The execution thread completes
// function callbacks.
//
// GC occurs within an update cycle. Through a cycle GC populates an
// in-memory cache with the complete and exclusive set of values of
// the new shared statistics object. Both are atomically swapped using
// a generation counter (which may or may not be necessary, but is one
// of several guards against surprising concurrent changes).
//
// Concurrent issuer threads are further restrained with a context list
// that at most one thread owns. There are two contexts, one for the
// thread and another for the specific update cycle. Listeners (like wait)
// use the second context to follow update cycles. Concurrent restarts
// cancel and replace the previous owner's contexts with their own. Atomic
// shared state swaps are likewise guarded on the issuer's context
// integrity.
//
// All stats are persisted within a single database in the `.dolt/stats`
// folder separate from user data. If there are multiple databases,
// one is selected by random as the storage target. If during
// initialization multiple databases have stats, one will be chosen
// by random as the target. If a database changes between server
// restarts, the storage stats will be useless but not impair regular
// operations because storage is only ever a best-effort
// content-addressed persistence layer; buckets will be regenerated if
// they are missing. If the database acting as a storage target is
// deleted, we swap the cache and write to a new storage target.
//
// The main data structures:
// - Table statistics map, that returns a list of table index statistics
// for a specific branch, database, and table name.
// - Object caches:
// - Bucket cache: Chunk addressed hash map. All provider histogram
// references point to objects in the bucket cache. Backed by a
// best-effort on-disk prolly.Map to make restarts faster.
// - Template cache: Table-schema/index addressed stats.Statistics object
// for a specific index.
// - Bound cache: Chunk addressed first row for an index histogram.
//
// The stats lifecycle can be controlled with:
// - dolt_stats_stop: clear queue and disable thread
// - dolt_stats_restart: clear queue, refresh queue, start thread
// - dolt_stats_purge: clear queue, refresh queue, clear cache,
// disable thread
// - dolt_stats_once: collect statistics once, ex: in sql-shell
// - dolt_stats_wait: block on a full queue cycle
// - dolt_stats_gc: block waiting for a GC signal
// - dolt_stats_flush: block waiting for a flush signal
//
@@ -1,290 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"fmt"
"sync"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/val"
)
type DoltStats struct {
Statistic *stats.Statistic
mu *sync.Mutex
// Chunks is a list of addresses for the histogram fanout level
Chunks []hash.Hash
// Active maps a chunk/bucket address to its position in
// the histogram. 1-indexed to differentiate from an empty
// field on disk
Active map[hash.Hash]int
Hist sql.Histogram
Tb *val.TupleBuilder
}
func (s *DoltStats) Clone(_ context.Context) sql.JSONWrapper {
return s
}
var _ sql.Statistic = (*DoltStats)(nil)
func (s *DoltStats) SetChunks(h []hash.Hash) {
s.mu.Lock()
defer s.mu.Unlock()
s.Chunks = h
}
func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic {
ret := *s
ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic)
return &ret
}
func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic {
ret := *s
ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic)
return &ret
}
func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic {
ret := *s
ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic)
return &ret
}
func (s *DoltStats) WithRowCount(u uint64) sql.Statistic {
ret := *s
ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic)
return &ret
}
func (s *DoltStats) WithNullCount(u uint64) sql.Statistic {
ret := *s
ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic)
return &ret
}
func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic {
ret := *s
ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic)
return &ret
}
func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic {
ret := *s
ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic)
return &ret
}
func (s *DoltStats) RowCount() uint64 {
return s.Statistic.RowCount()
}
func (s *DoltStats) DistinctCount() uint64 {
return s.Statistic.DistinctCount()
}
func (s *DoltStats) NullCount() uint64 {
return s.Statistic.NullCount()
}
func (s *DoltStats) AvgSize() uint64 {
return s.Statistic.AvgSize()
}
func (s *DoltStats) CreatedAt() time.Time {
return s.Statistic.CreatedAt()
}
func (s *DoltStats) Columns() []string {
return s.Statistic.Columns()
}
func (s *DoltStats) Types() []sql.Type {
return s.Statistic.Types()
}
func (s *DoltStats) Qualifier() sql.StatQualifier {
return s.Statistic.Qualifier()
}
func (s *DoltStats) IndexClass() sql.IndexClass {
return s.Statistic.IndexClass()
}
func (s *DoltStats) FuncDeps() *sql.FuncDepSet {
return s.Statistic.FuncDeps()
}
func (s *DoltStats) ColSet() sql.ColSet {
return s.Statistic.ColSet()
}
func (s *DoltStats) LowerBound() sql.Row {
return s.Statistic.LowerBound()
}
func NewDoltStats() *DoltStats {
return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}}
}
func (s *DoltStats) ToInterface() (interface{}, error) {
statVal, err := s.Statistic.ToInterface()
if err != nil {
return nil, err
}
ret := statVal.(map[string]interface{})
var hist sql.Histogram
for _, b := range s.Hist {
hist = append(hist, b)
}
histVal, err := hist.ToInterface()
if err != nil {
return nil, err
}
ret["statistic"].(map[string]interface{})["buckets"] = histVal
return ret, nil
}
func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
s.mu.Lock()
defer s.mu.Unlock()
ret := *s
ret.Hist = nil
for _, b := range h {
doltB, ok := b.(DoltBucket)
if !ok {
return nil, fmt.Errorf("invalid bucket type: %T, %s", b, h.DebugString())
}
ret.Hist = append(ret.Hist, doltB)
}
return &ret, nil
}
func (s *DoltStats) Histogram() sql.Histogram {
s.mu.Lock()
defer s.mu.Unlock()
return s.Hist
}
func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) {
hist, err := DoltHistFromSql(stat.Histogram(), stat.Types())
if err != nil {
return nil, err
}
ret := &DoltStats{
mu: &sync.Mutex{},
Hist: hist,
Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()),
Active: make(map[hash.Hash]int),
}
ret.Statistic.Fds = stat.FuncDeps()
ret.Statistic.Colset = stat.ColSet()
return ret, nil
}
func (s *DoltStats) UpdateActive() {
s.mu.Lock()
defer s.mu.Unlock()
newActive := make(map[hash.Hash]int)
for i, hash := range s.Chunks {
newActive[hash] = i
}
s.Active = newActive
}
type DoltHistogram []DoltBucket
type DoltBucket struct {
Bucket *stats.Bucket
Chunk hash.Hash
Created time.Time
}
func (d DoltBucket) RowCount() uint64 {
return d.Bucket.RowCount()
}
func (d DoltBucket) DistinctCount() uint64 {
return d.Bucket.DistinctCount()
}
func (d DoltBucket) NullCount() uint64 {
return d.Bucket.NullCount()
}
func (d DoltBucket) BoundCount() uint64 {
return d.Bucket.BoundCount()
}
func (d DoltBucket) UpperBound() sql.Row {
return d.Bucket.UpperBound()
}
func (d DoltBucket) McvCounts() []uint64 {
return d.Bucket.McvCounts()
}
func (d DoltBucket) Mcvs() []sql.Row {
return d.Bucket.Mcvs()
}
func DoltBucketChunk(b sql.HistogramBucket) hash.Hash {
return b.(DoltBucket).Chunk
}
func DoltBucketCreated(b sql.HistogramBucket) time.Time {
return b.(DoltBucket).Created
}
var _ sql.HistogramBucket = (*DoltBucket)(nil)
func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) {
ret := make(sql.Histogram, len(hist))
var err error
for i, b := range hist {
upperBound := make(sql.Row, len(b.UpperBound()))
for i, v := range b.UpperBound() {
upperBound[i], _, err = types[i].Convert(v)
if err != nil {
return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
}
}
mcvs := make([]sql.Row, len(b.Mcvs()))
for i, mcv := range b.Mcvs() {
for _, v := range mcv {
conv, _, err := types[i].Convert(v)
if err != nil {
return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
}
mcvs[i] = append(mcvs[i], conv)
}
}
ret[i] = DoltBucket{
Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs).(*stats.Bucket),
}
}
return ret, nil
}
@@ -1,4 +1,4 @@
// Copyright 2024 Dolthub, Inc.
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -15,10 +15,6 @@
package statspro
import (
"context"
"fmt"
"strings"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
@@ -26,67 +22,33 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
func NewStatsInitDatabaseHook(
statsProv *Provider,
ctxFactory func(ctx context.Context) (*sql.Context, error),
bThreads *sql.BackgroundThreads,
) sqle.InitDatabaseHook {
func NewInitDatabaseHook(sc *StatsController) sqle.InitDatabaseHook {
return func(
ctx *sql.Context,
pro *sqle.DoltDatabaseProvider,
_ *sqle.DoltDatabaseProvider,
name string,
denv *env.DoltEnv,
db dsess.SqlDatabase,
) error {
dbName := strings.ToLower(db.Name())
if statsDb, ok := statsProv.getStatDb(dbName); !ok {
statsDb, err := statsProv.sf.Init(ctx, db, statsProv.pro, denv.FS, env.GetCurrentUserHomeDir)
if err != nil {
ctx.GetLogger().Debugf("statistics load error: %s", err.Error())
return nil
}
statsProv.setStatDb(dbName, statsDb)
} else {
dSess := dsess.DSessFromSess(ctx.Session)
for _, br := range statsDb.Branches() {
branchQDbName := BranchQualifiedDatabase(dbName, br)
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
if err != nil {
ctx.GetLogger().Logger.Errorf("branch not found: %s", br)
continue
}
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
if !ok {
return fmt.Errorf("branch/database not found: %s", branchQDbName)
}
if ok, err := statsDb.SchemaChange(ctx, br, branchQDb); err != nil {
return err
} else if ok {
if err := statsDb.DeleteBranchStats(ctx, br, true); err != nil {
return err
}
}
}
ctx.GetLogger().Debugf("statistics init error: preexisting stats db: %s", dbName)
if sc.hdpEnv == nil {
sc.mu.Lock()
sc.hdpEnv = denv
sc.mu.Unlock()
}
ctx.GetLogger().Debugf("statistics refresh: initialize %s", name)
return statsProv.InitAutoRefresh(ctxFactory, name, bThreads)
sqlDb, ok := db.(sqle.Database)
if !ok {
return nil
}
// call should only fail if backpressure in secondary queue
return sc.AddFs(ctx, sqlDb, denv.FS, true)
}
}
func NewStatsDropDatabaseHook(statsProv *Provider) sqle.DropDatabaseHook {
func NewDropDatabaseHook(sc *StatsController) sqle.DropDatabaseHook {
return func(ctx *sql.Context, name string) {
statsProv.CancelRefreshThread(name)
if err := statsProv.DropDbStats(ctx, name, false); err != nil {
if err := sc.DropDbStats(ctx, name, false); err != nil {
ctx.GetLogger().Debugf("failed to close stats database: %s", err)
}
if db, ok := statsProv.getStatDb(name); ok {
if err := db.Close(); err != nil {
ctx.GetLogger().Debugf("failed to close stats database: %s", err)
}
delete(statsProv.statDbs, name)
}
}
}
@@ -1,75 +0,0 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/hash"
)
// Database is a backing store for a collection of DoltStats.
// Each stats database tracks a user database, with multiple
// branches potentially each having their own statistics.
type Database interface {
// ListStatQuals returns the list of index statistics for a branch.
ListStatQuals(branch string) []sql.StatQualifier
// LoadBranchStats starts tracking a specific branch's statistics.
LoadBranchStats(ctx *sql.Context, branch string) error
// DeleteBranchStats removes references to in memory index statistics.
// If |flush| is true delete the data from storage.
DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error
// GetStat returns a branch's index statistics.
GetStat(branch string, qual sql.StatQualifier) (*DoltStats, bool)
//SetStat bulk replaces the statistic, deleting any previous version
SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *DoltStats) error
//DeleteStats deletes a list of index statistics.
DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier)
// ReplaceChunks is an update interface that lets a stats implementation
// decide how to edit stats for a stats refresh.
ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error
// Flush instructs the database to sync any partial state to disk
Flush(ctx context.Context, branch string) error
// Close finalizes any file references.
Close() error
// SetTableHash updates the most recently tracked table stats table hash
SetTableHash(branch, tableName string, h hash.Hash)
// GetTableHash returns the most recently tracked table stats table hash
GetTableHash(branch, tableName string) hash.Hash
// SetSchemaHash updates the most recently stored table stat's schema hash
SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error
// GetSchemaHash returns the schema hash for the latest stored statistics
GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error)
// Branches returns the set of branches with tracked statistics databases
Branches() []string
// SchemaChange returns false if any table schema in the session
// root is incompatible with the latest schema used to create a stored
// set of statistics.
SchemaChange(ctx *sql.Context, branch string, branchQdb dsess.SqlDatabase) (bool, error)
}
// StatsFactory instances construct statistic databases.
type StatsFactory interface {
// Init gets a reference to the stats database for a dolt database
// rooted at the given filesystem. It will create the database if
// it does not exist.
Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (Database, error)
}
@@ -0,0 +1,410 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package jobqueue
import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/dolthub/dolt/go/libraries/utils/circular"
)
// A SerialQueue is a job queue which runs one job at a time. Jobs are
// run in the order they are submitted, with the exception that every
// interrupt job is run before any normal priority job.
//
// A SerialQueue can be paused, in which case it will accept new
// submissions, but will not run them until it is started again.
//
// A SerialQueue can be purged, which deletes any pending jobs from
// it.
//
// A SerialQueue can be stopped, in which case it will not accept new
// submissions and no pending work will be run. Stopping a queue does
// not purge it, but it is easy for a caller to stop and purge the
// queue.
//
// A stopped or paused SerialQueue can be started, which will cause it
// to start running submitted jobs again, including any unpurged jobs
// which were pending when it was stopped or paused.
//
// A SerialQueue runs background threads to coordinate its
// behavior. These background threads are launched with a `Context`
// supplied to its |Run| method. If that `Context` ever becomes
// `Done`, the SerialQueue termainally enters a completed state.
//
// In general, jobs running on the queue should not block indefinitely
// and should be very careful about any synchronization. It is safe
// for jobs within the queue to call DoAsync, InterruptAsync, Stop,
// Pause, Purge and Start on the queue itself. It is a deadlock for a
// job within the queue to perform a DoSync or InterruptSync on the
// queue itself, although that deadlock may be resolved if the
// provided |ctx| ends up |Done|.
type SerialQueue struct {
running atomic.Bool
// If the queue is terminally completed, this will be closed.
// Submissions to the queue scheduler select on this channel
// to return errors if the scheduler is no longer accepting
// work.
completed chan struct{}
runnerCh chan work
schedCh chan schedReq
errCb func(error)
}
// |work| represents work to be run on the runner goroutine.
type work struct {
// The function to call.
f func() error
// The channel to close after the work is run.
done chan struct{}
// Update worker rate
newRate time.Duration
}
type schedState int
const (
// When scheduler is running, it is willing to accept new work
// and to give work to the work thread.
schedState_Running schedState = iota
// When scheduler is paused, it is willing to accept new work
// but it does not give work to the work thread.
schedState_Paused
// When scheduler is stopped, it does not accept new work
// and it does not give work to the work thread.
schedState_Stopped
)
type schedReqType int
const (
schedReqType_Enqueue schedReqType = iota
schedReqType_Purge
schedReqType_Start
schedReqType_Pause
schedReqType_Stop
)
type schedPriority int
const (
schedPriority_Normal schedPriority = iota
schedPriority_High
)
// Incoming message for the scheduler thread.
type schedReq struct {
reqType schedReqType
// Always set, the scheduler's response is
// sent through this channel. The send
// must never block.
resp chan schedResp
// Set when |reqType| is Enqueue
pri schedPriority
// Set when |reqType| is Enqueue
work work
}
type schedResp struct {
err error
}
var ErrStoppedQueue = errors.New("stopped queue: cannot submit work to a stopped queue.")
var ErrCompletedQueue = errors.New("completed queue: the queue is no longer running.")
// Create a new serial queue. All of the methods on the returned
// SerialQueue block indefinitely until its |Run| method is called.
func NewSerialQueue() *SerialQueue {
return &SerialQueue{
completed: make(chan struct{}),
runnerCh: make(chan work),
schedCh: make(chan schedReq),
}
}
func (s *SerialQueue) WithErrorCb(errCb func(error)) *SerialQueue {
s.errCb = errCb
return s
}
// Run the serial queue's background threads with this |ctx|. If the
// |ctx| ever becomes |Done|, the queue enters a terminal completed
// state. It is an error to call this function more than once.
func (s *SerialQueue) Run(ctx context.Context) {
if !s.running.CompareAndSwap(false, true) {
panic("Cannot run a SerialQueue more than once.")
}
defer close(s.completed)
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
s.runScheduler(ctx)
}()
go func() {
defer wg.Done()
s.runRunner(ctx)
}()
wg.Wait()
}
// Start the queue. The queue can be in any state, including already started.
func (s *SerialQueue) Start() error {
return s.makeReq(schedReq{
reqType: schedReqType_Start,
resp: make(chan schedResp, 1),
})
}
// Pause the queue. The queue can be in any state, including already
// paused. Note that pausing the queue does not block on any
// currently running job to complete. A pattern to pause the queue
// with a guarantee that nothing is currently running is:
//
// s.InterruptSync(context.Background(), func() { s.Pause() })
func (s *SerialQueue) Pause() error {
return s.makeReq(schedReq{
reqType: schedReqType_Pause,
resp: make(chan schedResp, 1),
})
}
// Stop the queue. The queue can be in any state, including already
// stopped. Note that stopping the queue does not block on any
// currently running job to complete.
func (s *SerialQueue) Stop() error {
return s.makeReq(schedReq{
reqType: schedReqType_Stop,
resp: make(chan schedResp, 1),
})
}
// Purge the queue. All pending jobs will be dropped.
func (s *SerialQueue) Purge() error {
return s.makeReq(schedReq{
reqType: schedReqType_Purge,
resp: make(chan schedResp, 1),
})
}
func (s *SerialQueue) NewRateLimit(rate time.Duration) error {
return s.makeReq(schedReq{
reqType: schedReqType_Enqueue,
pri: schedPriority_High,
work: work{
f: func() error { return nil },
done: make(chan struct{}),
newRate: rate,
},
resp: make(chan schedResp, 1),
})
}
// Run a high priority job on the SerialQueue, blocking for its completion.
// If done against a Paused queue, this could block indefinitely. The
// block for completion is gated on the |ctx|.
func (s *SerialQueue) InterruptSync(ctx context.Context, f func() error) error {
w, err := s.submitWork(schedPriority_High, f)
if err != nil {
return err
}
select {
case <-w.done:
return nil
case <-ctx.Done():
return context.Cause(ctx)
case <-s.completed:
return ErrCompletedQueue
}
}
// Run a normal priority job on the SerialQueue, blocking for its completion.
// When done against a paused queue, this can block indefinitely.
func (s *SerialQueue) DoSync(ctx context.Context, f func() error) error {
w, err := s.submitWork(schedPriority_Normal, f)
if err != nil {
return err
}
select {
case <-w.done:
return nil
case <-ctx.Done():
return context.Cause(ctx)
case <-s.completed:
return ErrCompletedQueue
}
}
// Run a high priority job asynchronously on the queue. Returns once the
// job is accepted.
func (s *SerialQueue) InterruptAsync(f func() error) error {
_, err := s.submitWork(schedPriority_High, f)
if err != nil {
return err
}
return nil
}
// Run a normal priority job asynchronously on the queue. Returns once the
// job is accepted.
func (s *SerialQueue) DoAsync(f func() error) error {
_, err := s.submitWork(schedPriority_Normal, f)
if err != nil {
return err
}
return nil
}
// Helper function to submit work. Returns the work submitted, if it
// was successful, and an error otherwise.
func (s *SerialQueue) submitWork(pri schedPriority, f func() error) (work, error) {
w := work{
f: f,
done: make(chan struct{}),
}
err := s.makeReq(schedReq{
reqType: schedReqType_Enqueue,
pri: pri,
work: w,
resp: make(chan schedResp, 1),
})
if err != nil {
return work{}, err
}
return w, nil
}
func (s *SerialQueue) makeReq(req schedReq) error {
select {
case s.schedCh <- req:
resp := <-req.resp
return resp.err
case <-s.completed:
return ErrCompletedQueue
}
}
// Read off the input channels and maintain queues of pending work.
// Deliver that work to the runner channel if it is desired.
func (s *SerialQueue) runScheduler(ctx context.Context) {
state := schedState_Running
normalQ := circular.NewBuff[work](16)
highQ := circular.NewBuff[work](16)
for {
var sendWorkCh chan work
var sendWork work
var sentWorkCallback func()
if state == schedState_Running {
if highQ.Len() > 0 {
sendWorkCh = s.runnerCh
sendWork = highQ.Front()
sentWorkCallback = highQ.Pop
} else if normalQ.Len() > 0 {
sendWorkCh = s.runnerCh
sendWork = normalQ.Front()
sentWorkCallback = normalQ.Pop
}
}
select {
case msg := <-s.schedCh:
switch msg.reqType {
case schedReqType_Enqueue:
if state == schedState_Stopped {
msg.resp <- schedResp{
err: ErrStoppedQueue,
}
} else {
if msg.pri == schedPriority_High {
highQ.Push(msg.work)
} else {
normalQ.Push(msg.work)
}
msg.resp <- schedResp{
err: nil,
}
}
case schedReqType_Purge:
highQ = circular.NewBuff[work](highQ.Cap())
normalQ = circular.NewBuff[work](normalQ.Cap())
msg.resp <- schedResp{
err: nil,
}
case schedReqType_Start:
state = schedState_Running
msg.resp <- schedResp{
err: nil,
}
case schedReqType_Pause:
state = schedState_Paused
msg.resp <- schedResp{
err: nil,
}
case schedReqType_Stop:
state = schedState_Stopped
msg.resp <- schedResp{
err: nil,
}
}
case sendWorkCh <- sendWork:
// Pop from queue the work came from.
sentWorkCallback()
case <-ctx.Done():
return
}
}
}
// Read off the runner channel and run the submitted work.
func (s *SerialQueue) runRunner(ctx context.Context) {
ticker := time.NewTicker(1)
for {
select {
case w := <-s.runnerCh:
if w.newRate > 0 {
ticker.Reset(w.newRate)
}
// do not run jobs more frequently than the ticker rate
select {
case <-ticker.C:
case <-ctx.Done():
}
func() {
var err error
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("serialQueue panicked running work: %s", r)
}
if err != nil {
s.errCb(err)
}
}()
err = w.f()
}()
close(w.done)
case <-ctx.Done():
return
}
}
}
@@ -0,0 +1,361 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package jobqueue
import (
"context"
"os"
"runtime"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestSerialQueue(t *testing.T) {
if runtime.GOOS == "windows" && os.Getenv("CI") != "" {
t.Skip("Racy on Windows CI")
}
t.Run("CanceledRunContext", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
queue := NewSerialQueue()
// This should return.
queue.Run(ctx)
// Now all methods should return ErrCompletedQueue.
assert.ErrorIs(t, queue.Start(), ErrCompletedQueue)
assert.ErrorIs(t, queue.Pause(), ErrCompletedQueue)
assert.ErrorIs(t, queue.Stop(), ErrCompletedQueue)
assert.ErrorIs(t, queue.DoSync(context.Background(), func() error { return nil }), ErrCompletedQueue)
assert.ErrorIs(t, queue.DoAsync(func() error { return nil }), ErrCompletedQueue)
assert.ErrorIs(t, queue.InterruptSync(context.Background(), func() error { return nil }), ErrCompletedQueue)
assert.ErrorIs(t, queue.InterruptAsync(func() error { return nil }), ErrCompletedQueue)
})
t.Run("StartsRunning", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
var ran bool
err := queue.DoSync(context.Background(), func() error {
ran = true
return nil
})
assert.NoError(t, err)
assert.True(t, ran, "the sync task ran.")
cancel()
wg.Wait()
})
t.Run("StoppedQueueReturnsError", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
assert.NoError(t, queue.Stop())
err := queue.DoSync(context.Background(), func() error { return nil })
assert.ErrorIs(t, err, ErrStoppedQueue)
cancel()
wg.Wait()
})
t.Run("PausedQueueDoesNotRun", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
assert.NoError(t, queue.Pause())
var ran bool
for i := 0; i < 16; i++ {
err := queue.DoAsync(func() error {
ran = true
return nil
})
assert.NoError(t, err)
}
cancel()
wg.Wait()
assert.False(t, ran, "work did not run on the paused queue.")
})
t.Run("StartingPausedQueueRunsIt", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
assert.NoError(t, queue.Pause())
var ran bool
for i := 0; i < 16; i++ {
err := queue.DoAsync(func() error {
ran = true
return nil
})
assert.NoError(t, err)
}
assert.NoError(t, queue.Start())
err := queue.DoSync(context.Background(), func() error { return nil })
assert.NoError(t, err)
assert.True(t, ran, "work ran after the paused queue was started.")
cancel()
wg.Wait()
})
t.Run("InterruptWorkRunsFirst", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
assert.NoError(t, queue.Pause())
var cnt int
queue.DoAsync(func() error {
assert.Equal(t, cnt, 2)
cnt += 1
return nil
})
queue.DoAsync(func() error {
assert.Equal(t, cnt, 3)
cnt += 1
return nil
})
queue.InterruptAsync(func() error {
assert.Equal(t, cnt, 0)
cnt += 1
return nil
})
queue.InterruptAsync(func() error {
assert.Equal(t, cnt, 1)
cnt += 1
return nil
})
assert.NoError(t, queue.Start())
assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil }))
assert.Equal(t, cnt, 4)
cancel()
wg.Wait()
})
t.Run("StopFromQueue", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
// block until queue is running
assert.NoError(t, queue.DoSync(ctx, func() error {
return nil
}))
var cnt int
for i := 0; i < 16; i++ {
// Some of these calls may error, since the queue
// will be stopped asynchronously.
queue.DoAsync(func() error {
cnt += 1
assert.NoError(t, queue.Stop())
return nil
})
}
assert.Equal(t, cnt, 1)
cancel()
wg.Wait()
})
t.Run("PauseFromQueue", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
// block until queue is running
assert.NoError(t, queue.DoSync(ctx, func() error {
return nil
}))
done := make(chan struct{})
for i := 0; i < 16; i++ {
err := queue.DoAsync(func() error {
close(done)
assert.NoError(t, queue.Pause())
return nil
})
assert.NoError(t, err)
}
<-done
cancel()
wg.Wait()
})
t.Run("PurgeFromQueue", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
queue.Run(ctx)
return nil
}()
assert.NoError(t, queue.Pause())
var cnt int
didRun := make(chan struct{})
for i := 0; i < 16; i++ {
err := queue.DoAsync(func() error {
cnt += 1
assert.NoError(t, queue.Purge())
close(didRun)
return nil
})
assert.NoError(t, err)
}
assert.NoError(t, queue.Start())
<-didRun
assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil }))
assert.Equal(t, cnt, 1)
cancel()
wg.Wait()
})
t.Run("DoSyncInQueueDeadlockWithContext", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
start := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
close(start)
queue.Run(ctx)
return nil
}()
<-start
var cnt int
err := queue.DoSync(context.Background(), func() error {
cnt += 1
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
err := queue.DoSync(ctx, func() error {
cnt += 1
return nil
})
assert.ErrorIs(t, err, context.DeadlineExceeded)
return nil
})
assert.NoError(t, err)
assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil }))
// Both tasks eventually ran...
assert.Equal(t, cnt, 2)
cancel()
wg.Wait()
})
t.Run("SyncReturnsErrCompletedQueueAfterWorkAccepted", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
queue := NewSerialQueue()
start := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func() error {
defer wg.Done()
close(start)
queue.Run(ctx)
return nil
}()
<-start
queue.Pause()
var err error
var ran bool
wg.Add(1)
go func() error {
defer wg.Done()
err = queue.InterruptSync(context.Background(), func() error {
ran = true
return nil
})
return nil
}()
wg.Add(1)
go func() error {
defer wg.Done()
time.Sleep(100 * time.Millisecond)
queue.Stop()
return nil
}()
cancel()
wg.Wait()
assert.ErrorIs(t, err, ErrCompletedQueue)
assert.False(t, ran, "the interrupt task never ran.")
})
t.Run("RateLimitWorkThroughput", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
queue := NewSerialQueue()
running := make(chan struct{})
go func() {
close(running)
queue.Run(ctx)
}()
<-running
// first will run because timeout > job rate
ran := false
subCtx, cancel2 := context.WithTimeout(ctx, 5*time.Millisecond)
defer cancel2()
err := queue.DoSync(subCtx, func() error {
ran = true
return nil
})
assert.NoError(t, err)
assert.True(t, ran, "the interrupt task never ran.")
// second timeout < jobrate, will fail
queue.NewRateLimit(10 * time.Millisecond)
ran = false
subCtx, cancel3 := context.WithTimeout(ctx, 5*time.Millisecond)
defer cancel3()
err = queue.DoSync(subCtx, func() error {
ran = true
return nil
})
assert.ErrorIs(t, err, context.DeadlineExceeded)
assert.False(t, ran, "the interrupt task never ran.")
})
}
@@ -0,0 +1,259 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"fmt"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
var ErrStatsIssuerPaused = fmt.Errorf("stats issuer is paused")
type listenerEvent uint16
const (
leUnknown = listenerEvent(iota)
leSwap listenerEvent = 1 << 0
leStop listenerEvent = 1 << 1
leGc listenerEvent = 1 << 2
leFlush listenerEvent = 1 << 3
)
func (sc *StatsController) signalListener(s listenerEvent) {
keep := 0
for i, l := range sc.listeners {
if (l.target|leStop)&s > 0 {
l.c <- s
close(l.c)
} else {
sc.listeners[keep] = sc.listeners[i]
keep++
}
}
sc.listeners = sc.listeners[:keep]
}
func (sc *StatsController) newThreadCtx(ctx context.Context) context.Context {
sc.mu.Lock()
defer sc.mu.Unlock()
newCtx, cancel := context.WithCancel(ctx)
if sc.activeCtxCancel != nil {
sc.activeCtxCancel()
}
sc.signalListener(leStop)
sc.activeCtxCancel = cancel
return newCtx
}
type listener struct {
target listenerEvent
c chan listenerEvent
}
func (sc *StatsController) addListener(e listenerEvent) (chan listenerEvent, error) {
sc.mu.Lock()
defer sc.mu.Unlock()
if sc.activeCtxCancel == nil {
return nil, ErrStatsIssuerPaused
}
l := listener{target: e, c: make(chan listenerEvent, 1)}
sc.listeners = append(sc.listeners, l)
return l.c, nil
}
func (sc *StatsController) Stop() {
// xxx: do not pause |sq|, analyze jobs still need to run
sc.mu.Lock()
defer sc.mu.Unlock()
if sc.activeCtxCancel != nil {
sc.activeCtxCancel()
sc.activeCtxCancel = nil
}
sc.signalListener(leStop)
return
}
// RefreshFromSysVars reads the environment variables and updates controller
// parameters. If the queue is not started this will hang.
func (sc *StatsController) RefreshFromSysVars() {
_, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly)
sc.SetMemOnly(memOnly.(int8) == 1)
_, gcEnabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCEnabled)
sc.SetEnableGc(gcEnabled.(int8) == 1)
typ, jobI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsJobInterval)
_, gcI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCInterval)
jobInterval, _, _ := typ.GetType().Convert(jobI)
gcInterval, _, _ := typ.GetType().Convert(gcI)
sc.SetTimers(
jobInterval.(int64)*int64(time.Millisecond),
gcInterval.(int64)*int64(time.Millisecond),
)
}
func (sc *StatsController) Restart() error {
select {
case <-sc.closed:
return fmt.Errorf("StatsController is closed")
default:
}
sc.sq.Start()
sc.RefreshFromSysVars()
done := make(chan struct{})
if err := sc.bgThreads.Add("stats_worker", func(ctx context.Context) {
ctx = sc.newThreadCtx(ctx)
close(done)
err := sc.runWorker(ctx)
if err != nil {
sc.logger.Errorf("stats stopped: %s", err.Error())
}
}); err != nil {
return err
}
// only return after latestCtx updated
<-done
return nil
}
func (sc *StatsController) RunQueue() {
if err := sc.bgThreads.Add("stats_scheduler", sc.sq.Run); err != nil {
sc.descError("start scheduler", err)
}
// block on queue starting
sc.sq.DoSync(context.Background(), func() error { return nil })
return
}
// Init should only be called once
func (sc *StatsController) Init(ctx context.Context, pro *sqle.DoltDatabaseProvider, ctxGen ctxFactory, bthreads *sql.BackgroundThreads, dbs []sql.Database) error {
sc.pro = pro
sc.ctxGen = ctxGen
sc.bgThreads = bthreads
sc.RunQueue()
sqlCtx, err := sc.ctxGen(ctx)
if err != nil {
return err
}
defer sql.SessionEnd(sqlCtx.Session)
sql.SessionCommandBegin(sqlCtx.Session)
defer sql.SessionCommandEnd(sqlCtx.Session)
for i, db := range dbs {
if db, ok := db.(sqle.Database); ok { // exclude read replica dbs
fs, err := sc.pro.FileSystemForDatabase(db.AliasedName())
if err != nil {
return err
}
if err := sc.AddFs(sqlCtx, db, fs, false); err != nil {
return err
}
if i > 0 || sc.memOnly {
continue
}
// attempt to access previously written stats
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
if err != nil {
return err
}
exists, isDir := statsFs.Exists("")
if exists && isDir {
newKv, err := sc.initStorage(ctx, fs)
if err == nil {
sc.kv = newKv
sc.statsBackingDb = fs
continue
} else {
path, _ := statsFs.Abs("")
sc.descError("failed to reboot stats from: "+path, err)
}
}
// otherwise wipe and create new stats dir
if err := sc.lockedRotateStorage(ctx); err != nil {
return err
}
}
}
return nil
}
func (sc *StatsController) waitForSignal(ctx context.Context, signal listenerEvent, cnt int) (err error) {
for cnt > 0 {
var l chan listenerEvent
l, err = sc.addListener(signal)
if err != nil {
return err
}
select {
case <-ctx.Done():
return context.Cause(ctx)
case <-l:
cnt--
}
}
return nil
}
func (sc *StatsController) WaitForSync(ctx context.Context) (err error) {
// wait for 2 cycles because first completion is usually a stale context
return sc.waitForSignal(ctx, leSwap, 2)
}
func (sc *StatsController) WaitForFlush(ctx *sql.Context) error {
sc.mu.Lock()
memOnly := sc.memOnly
sc.mu.Unlock()
if memOnly {
return fmt.Errorf("memory only statistics will not flush")
}
return sc.waitForSignal(ctx, leFlush, 1)
}
func (sc *StatsController) Gc(ctx *sql.Context) error {
sc.setDoGc(true)
return sc.waitForSignal(ctx, leGc, 1)
}
func (sc *StatsController) Close() {
sc.mu.Lock()
defer sc.mu.Unlock()
if sc.activeCtxCancel != nil {
sc.activeCtxCancel()
sc.activeCtxCancel = nil
sc.sq.InterruptAsync(func() error {
return sc.sq.Stop()
})
}
sc.signalListener(leStop)
close(sc.closed)
return
}
@@ -0,0 +1,250 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"sync"
"testing"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/stretchr/testify/require"
"golang.org/x/sync/errgroup"
)
func TestListening(t *testing.T) {
bthreads := sql.NewBackgroundThreads()
defer bthreads.Shutdown()
t.Run("ClosedDoesNotStart", func(t *testing.T) {
sc := newStatsCoord(bthreads)
sc.Close()
require.Error(t, sc.Restart())
require.Nil(t, sc.activeCtxCancel)
})
t.Run("IsStoppable", func(t *testing.T) {
sc := newStatsCoord(bthreads)
eg := errgroup.Group{}
ctx := sc.newThreadCtx(context.Background())
eg.Go(func() error {
return sc.runWorker(ctx)
})
require.NotNil(t, sc.activeCtxCancel)
l, err := sc.addListener(leSwap)
require.NoError(t, err)
<-l
select {
case <-ctx.Done():
t.Fatal("expected latest thread ctx to be active")
default:
}
sc.Stop()
<-ctx.Done()
require.ErrorIs(t, eg.Wait(), context.Canceled)
})
t.Run("StopsAreIdempotent", func(t *testing.T) {
sc := newStatsCoord(bthreads)
eg := errgroup.Group{}
ctx := sc.newThreadCtx(context.Background())
eg.Go(func() error {
return sc.runWorker(ctx)
})
sc.Stop()
sc.Stop()
sc.Stop()
sc.Stop()
<-ctx.Done()
require.ErrorIs(t, eg.Wait(), context.Canceled)
})
t.Run("IsRestartable", func(t *testing.T) {
sc := newStatsCoord(bthreads)
eg := errgroup.Group{}
ctx1 := sc.newThreadCtx(context.Background())
eg.Go(func() error {
return sc.runWorker(ctx1)
})
ctx2 := sc.newThreadCtx(context.Background())
eg.Go(func() error {
return sc.runWorker(ctx2)
})
ctx3 := sc.newThreadCtx(context.Background())
eg.Go(func() error {
return sc.runWorker(ctx3)
})
<-ctx1.Done()
<-ctx2.Done()
sc.Stop()
<-ctx3.Done()
require.ErrorIs(t, eg.Wait(), context.Canceled)
})
t.Run("ConcurrentStartStopsAreOk", func(t *testing.T) {
sc := newStatsCoord(bthreads)
wg := sync.WaitGroup{}
wg.Add(2)
go func() {
defer wg.Done()
for range 20 {
require.NoError(t, sc.Restart())
l, err := sc.addListener(leSwap)
if err != nil {
require.ErrorIs(t, err, ErrStatsIssuerPaused)
continue
}
select {
case <-l:
}
}
}()
go func() {
defer wg.Done()
for range 20 {
sc.Stop()
l, err := sc.addListener(leSwap)
if err != nil {
require.ErrorIs(t, err, ErrStatsIssuerPaused)
continue
}
select {
case <-l:
case <-time.Tick(10 * time.Millisecond):
print()
}
}
}()
wg.Wait()
})
t.Run("ListenForSwap", func(t *testing.T) {
sc := newStatsCoord(bthreads)
require.NoError(t, sc.Restart())
l, err := sc.addListener(leSwap)
require.NoError(t, err)
select {
case e := <-l:
require.True(t, (leSwap&e) > 0, "expected success or gc signal")
}
})
t.Run("ListenForStop", func(t *testing.T) {
sc := newStatsCoord(bthreads)
require.NoError(t, sc.Restart())
var l chan listenerEvent
err := sc.sq.DoSync(context.Background(), func() error {
// do this in serial queue to make sure we don't race
// with swap
var err error
require.NoError(t, err)
l, err = sc.addListener(leUnknown)
require.NoError(t, err)
sc.Stop()
return nil
})
require.NoError(t, err)
select {
case e := <-l:
require.Equal(t, e, leStop)
default:
t.Fatal("expected listener to recv stop")
}
})
t.Run("ListenerFailsIfStopped", func(t *testing.T) {
sc := newStatsCoord(bthreads)
require.NoError(t, sc.Restart())
sc.Stop()
_, err := sc.addListener(leUnknown)
require.ErrorIs(t, err, ErrStatsIssuerPaused)
})
t.Run("ListenerFailsIfClosed", func(t *testing.T) {
sc := newStatsCoord(bthreads)
sc.Close()
require.Error(t, sc.Restart())
_, err := sc.addListener(leUnknown)
require.ErrorIs(t, err, ErrStatsIssuerPaused)
})
t.Run("WaitBlocksOnStatsCollection", func(t *testing.T) {
sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true)
require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)"))
require.NoError(t, sc.Restart())
done := make(chan struct{})
wg := sync.WaitGroup{}
wg.Add(2)
err := sc.sq.DoAsync(func() error {
defer wg.Done()
<-done
return nil
})
require.NoError(t, err)
go func() {
defer wg.Done()
defer close(done)
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
err := sc.waitForSignal(ctx, leSwap, 1)
require.ErrorIs(t, err, context.DeadlineExceeded)
}()
wg.Wait()
})
t.Run("WaitReturnsIfStoppedBefore", func(t *testing.T) {
sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true)
require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)"))
require.NoError(t, sc.Restart())
done := make(chan struct{})
wg := sync.WaitGroup{}
wg.Add(2)
err := sc.sq.DoAsync(func() error {
defer wg.Done()
<-done
return nil
})
require.NoError(t, err)
go func() {
defer wg.Done()
defer close(done)
sc.Stop()
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
err := sc.waitForSignal(ctx, leSwap, 1)
require.ErrorIs(t, err, ErrStatsIssuerPaused)
}()
wg.Wait()
})
t.Run("WaitHangsUntilCycleCompletes", func(t *testing.T) {
sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true)
require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)"))
require.NoError(t, sc.Restart())
done := make(chan struct{})
wg := sync.WaitGroup{}
wg.Add(2)
err := sc.sq.DoAsync(func() error {
defer wg.Done()
<-done
return nil
})
require.NoError(t, err)
go func() {
defer wg.Done()
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
err := sc.waitForSignal(ctx, leSwap, 1)
require.NoError(t, err)
}()
close(done)
wg.Wait()
})
}
@@ -0,0 +1,86 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
type StatsNoop struct{}
func (s StatsNoop) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
return nil, nil
}
func (s StatsNoop) AnalyzeTable(ctx *sql.Context, table sql.Table, db string) error {
return nil
}
func (s StatsNoop) SetStats(ctx *sql.Context, stats sql.Statistic) error {
return nil
}
func (s StatsNoop) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
return nil, false
}
func (s StatsNoop) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
return nil
}
func (s StatsNoop) DropDbStats(ctx *sql.Context, db string, flush bool) error {
return nil
}
func (s StatsNoop) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
return 0, nil
}
func (s StatsNoop) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
return 0, nil
}
func (s StatsNoop) CancelRefreshThread(string) {
return
}
func (s StatsNoop) StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error {
return nil
}
func (s StatsNoop) ThreadStatus(string) string {
return "stats disabled"
}
func (s StatsNoop) Prune(ctx *sql.Context) error {
return nil
}
func (s StatsNoop) Purge(ctx *sql.Context) error {
return nil
}
func (s StatsNoop) WaitForSync(ctx *sql.Context) error {
return nil
}
func (s StatsNoop) CollectOnce(ctx *sql.Context) (string, error) {
return "", nil
}
var _ sql.StatsProvider = StatsNoop{}
@@ -0,0 +1,731 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"encoding/json"
"log"
"strconv"
"testing"
"github.com/dolthub/go-mysql-server/sql"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures"
)
type scriptTest struct {
name string
setup []string
assertions []assertion
}
type assertion struct {
query string
res []sql.Row
err string
}
func TestStatScripts(t *testing.T) {
threads := sql.NewBackgroundThreads()
defer threads.Shutdown()
scripts := []scriptTest{
{
name: "track updates",
setup: []string{
"create table xy (x int primary key, y varchar(16), key (y,x))",
"insert into xy values (0,'zero'), (1, 'one')",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
},
{
query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(9)}},
},
{
query: "update xy set y = 2 where x between 100 and 800",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(9)}},
},
},
},
{
name: "track deletes",
setup: []string{
"create table xy (x int primary key, y varchar(16), key (y,x))",
"insert into xy values (0,'zero'), (1, 'one')",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
},
{
query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(9)}},
},
{
query: "delete from xy where x > 600",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(5)}},
},
},
},
{
name: "ddl table",
setup: []string{
"create table xy (x int primary key, y varchar(16), key (y,x))",
"insert into xy values (0,'0'), (1,'0'), (2,'0')",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
{
query: "truncate table xy",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(0)}},
},
{
query: "insert into xy values (0,'0'), (1,'0'), (2,'0')",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
{
query: "drop table xy",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(0)}},
},
},
},
{
name: "ddl index",
setup: []string{
"create table xy (x int primary key, y varchar(16), key (y,x))",
"insert into xy values (0,'0'), (1,'0'), (2,'0')",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
{
query: "alter table xy drop index y",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(1)}},
},
{
query: "alter table xy add index yx (y,x)",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
{
query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
res: []sql.Row{{"varchar(16),int", "0,2"}},
},
{
query: "alter table xy modify column y int",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
res: []sql.Row{{"int,int", "0,2"}},
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
},
},
{
name: "mcv counts",
setup: []string{
"create table xy (x int primary key, y int, key (y,x))",
"alter table xy add index y2 (y)",
"alter table xy add index x2 (x,y)",
"insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)",
},
assertions: []assertion{
{
query: "select mcv1, mcv2, mcv_counts from dolt_statistics where index_name = 'y2'",
res: []sql.Row{{"1", "0", "4,6"}},
},
{
query: "select mcv_counts from dolt_statistics where index_name = 'y'",
res: []sql.Row{{""}},
},
{
query: "select mcv_counts from dolt_statistics where index_name = 'x2'",
res: []sql.Row{{""}},
},
},
},
{
name: "vector index",
setup: []string{
"create table xy (x int primary key, y json, vector key(y))",
"insert into xy values (0, '0'), (1, '1'), (2, '2'), (3, NULL), (4, NULL)",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "xy", "primary"}},
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 1,
Backing: "mydb",
Active: true,
StorageBucketCnt: 1,
CachedBucketCnt: 1,
CachedBoundCnt: 1,
CachedTemplateCnt: 1,
StatCnt: 1,
}},
},
},
},
},
{
name: "generated index",
setup: []string{
"create table t (pk int primary key, c0 int, c1 int as (c0) virtual, index idx(c1))",
"insert into t (pk, c0) values (0,0), (1,1), (2,2), (3,NULL), (4,NULL)",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "t", "idx"}, {"mydb", "t", "primary"}},
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 1,
Backing: "mydb",
Active: true,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 1,
}},
},
},
},
},
{
name: "keyless index",
setup: []string{
"create table t (c1 int, c2 int, index (c2))",
"insert into t values (0,0), (1,1), (2,2), (3,NULL), (4,NULL)",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "t", "c2"}},
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 1,
Backing: "mydb",
Active: true,
StorageBucketCnt: 1,
CachedBucketCnt: 1,
CachedBoundCnt: 1,
CachedTemplateCnt: 1,
StatCnt: 1,
}},
},
},
},
},
{
name: "caps testing",
setup: []string{
"create table XY (x int primary key, Y int, key Yx (Y,x))",
"alter table xy add index y2 (y)",
"insert into xy values (0,0), (1,0), (2,0)",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y2"}, {"mydb", "xy", "yx"}},
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(3)}},
},
{
query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(12)}},
},
{
query: "delete from xy where x > 500",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(6)}},
},
},
},
{
name: "database ddl",
setup: []string{
"create table mydb.xy (x int primary key, y int, key (y,x))",
"insert into xy values (0,0), (1,0), (2,0)",
"create database repo2",
"create table repo2.xy (x int primary key, y int, key (y,x))",
"insert into repo2.xy values (0,0), (1,0), (2,0)",
"create table repo2.ab (a int primary key, b int, key (b,a))",
"insert into repo2.ab values (0,0), (1,0), (2,0)",
},
assertions: []assertion{
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{
{"mydb", "xy", "primary"}, {"mydb", "xy", "y"},
},
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
{
query: "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name",
res: []sql.Row{
{"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
{"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
},
},
{
query: "use repo2",
},
{
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
res: []sql.Row{
{"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
{"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
},
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(4)}},
},
{
query: "insert into repo2.xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(10)}},
},
{
query: "drop database repo2",
},
{
query: "call dolt_stats_wait()",
},
{
query: "use mydb",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
},
},
{
name: "recreate table without index",
setup: []string{
"create table xy (x int primary key, y int, key (y,x))",
"insert into xy values (0,0), (1,0), (2,0)",
},
assertions: []assertion{
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(2)}},
},
{
query: "drop table xy",
},
{
query: "create table xy (x int primary key, y int)",
},
{
query: "call dolt_stats_wait()",
},
{
query: "select count(*) from dolt_statistics",
res: []sql.Row{{int64(0)}},
},
},
},
{
name: "stats info",
setup: []string{
"create table xy (x int primary key, y int, key (y,x))",
"insert into xy values (0,0), (1,0), (2,0)",
"call dolt_add('-A')",
"call dolt_commit('-m', 'create xy')",
"call dolt_checkout('-b', 'feat')",
"call dolt_checkout('main')",
},
assertions: []assertion{
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 2,
Backing: "mydb",
Active: true,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 2,
}},
},
},
{
query: "call dolt_checkout('feat')",
},
{
query: "drop table xy",
},
{
query: "call dolt_stats_wait()",
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 2,
Backing: "mydb",
Active: true,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 1,
},
}},
},
{
query: "call dolt_checkout('main')",
},
{
query: "call dolt_branch('-D', 'feat')",
},
{
query: "call dolt_stats_wait()",
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 1,
Backing: "mydb",
Active: true,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 1,
},
}},
},
},
},
{
name: "stats stop/start",
setup: []string{
"create table xy (x int primary key, y int, key (y,x))",
"insert into xy values (0,0), (1,0), (2,0)",
"call dolt_add('-A')",
"call dolt_commit('-m', 'create xy')",
"call dolt_checkout('-b', 'feat')",
"call dolt_checkout('main')",
},
assertions: []assertion{
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 2,
Backing: "mydb",
Active: true,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 2,
},
}},
},
{
query: "call dolt_stats_stop()",
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 2,
Backing: "mydb",
Active: false,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 2,
},
}},
},
{
query: "call dolt_stats_restart()",
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 2,
Backing: "mydb",
Active: true,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 2,
},
}},
},
},
},
{
name: "stats purge",
setup: []string{
"create table xy (x int primary key, y int, key (y,x))",
"insert into xy values (0,0), (1,0), (2,0)",
"call dolt_add('-A')",
"call dolt_commit('-m', 'create xy')",
"call dolt_checkout('-b', 'feat')",
"call dolt_checkout('main')",
"insert into xy values (3,0)",
"call dolt_checkout('feat')",
"insert into xy values (3,0)",
},
assertions: []assertion{
{
query: "call dolt_stats_purge()",
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 0,
Backing: "mydb",
Active: false,
StorageBucketCnt: 0,
CachedBucketCnt: 0,
CachedBoundCnt: 0,
CachedTemplateCnt: 0,
StatCnt: 0,
},
}},
},
{
query: "call dolt_stats_restart()",
},
{
query: "call dolt_stats_wait()",
},
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{
{dprocedures.StatsInfo{
DbCnt: 2,
Backing: "mydb",
Active: true,
StorageBucketCnt: 2,
CachedBucketCnt: 2,
CachedBoundCnt: 2,
CachedTemplateCnt: 2,
StatCnt: 2,
},
}},
},
},
},
{
name: "null bounds",
setup: []string{
"create table xy (x int primary key, y int, key (y))",
"insert into xy values (0,NULL), (1,0), (2,0)",
"CREATE table xyz (x bigint primary key, y varchar(500), z bigint, key(x, z));",
"insert into xyz values (0,0,NULL), (1,1,0), (2,2,0)",
},
assertions: []assertion{
{
query: "call dolt_stats_info('--short')",
res: []sql.Row{{dprocedures.StatsInfo{
DbCnt: 1,
Active: true,
StorageBucketCnt: 4,
CachedBucketCnt: 4,
CachedBoundCnt: 4,
CachedTemplateCnt: 4,
StatCnt: 2,
Backing: "mydb",
}}},
},
{
query: "select index_name, null_count from dolt_statistics",
res: []sql.Row{{"primary", uint64(0)}, {"y", uint64(1)}, {"primary", uint64(0)}, {"x", uint64(1)}},
},
},
},
}
for _, tt := range scripts {
t.Run(tt.name, func(t *testing.T) {
bthreads := sql.NewBackgroundThreads()
ctx, sqlEng, sc := emptySetup(t, bthreads, false, false)
defer sqlEng.Close()
require.NoError(t, sc.Restart())
//sc.Debug = true
for _, s := range tt.setup {
require.NoError(t, executeQuery(ctx, sqlEng, s))
}
require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_flush()"))
for i, a := range tt.assertions {
if sc.Debug {
log.Println(a.query)
}
rows, err := executeQueryResults(ctx, sqlEng, a.query)
if a.err != "" {
require.Equal(t, a.err, err.Error())
} else {
require.NoError(t, err)
}
if a.res != nil {
cmp, exp := normalize(rows, a.res)
require.Equal(t, exp, cmp, "query no "+strconv.Itoa(i)+" failed: "+a.query)
}
}
})
}
}
func normalize(cmp, exp []sql.Row) ([]sql.Row, []sql.Row) {
for i, r := range exp {
for j, v := range r {
if _, ok := v.(dprocedures.StatsInfo); ok {
if strSi, ok := cmp[i][j].(string); ok {
si := dprocedures.StatsInfo{}
if err := json.Unmarshal([]byte(strSi), &si); err != nil {
log.Fatal(err)
}
si.GenCnt = 0
cmp[i][j] = si
}
}
}
}
return cmp, exp
}
@@ -0,0 +1,550 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"encoding/binary"
"errors"
"fmt"
"strconv"
"strings"
"sync"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
)
var ErrIncompatibleVersion = errors.New("client stats version mismatch")
type StatsKv interface {
PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error
GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error)
GetTemplate(key templateCacheKey) (stats.Statistic, bool)
PutTemplate(key templateCacheKey, stat stats.Statistic)
GetBound(h hash.Hash, len int) (sql.Row, bool)
PutBound(h hash.Hash, r sql.Row, l int)
Flush(ctx context.Context) (int, error)
Len() int
GcGen() uint64
}
var _ StatsKv = (*prollyStats)(nil)
var _ StatsKv = (*memStats)(nil)
var _ StatsKv = (*StatsController)(nil)
func NewMemStats() *memStats {
return &memStats{
mu: sync.Mutex{},
buckets: make(map[bucketKey]*stats.Bucket),
templates: make(map[templateCacheKey]stats.Statistic),
bounds: make(map[bucketKey]sql.Row),
gcFlusher: make(map[*val.TupleBuilder][]bucketKey),
}
}
type memStats struct {
mu sync.Mutex
gcGen uint64
buckets map[bucketKey]*stats.Bucket
templates map[templateCacheKey]stats.Statistic
bounds map[bucketKey]sql.Row
// gcFlusher tracks state require to lazily swap from
// a *memStats to *prollyStats
gcFlusher map[*val.TupleBuilder][]bucketKey
}
func (m *memStats) StorageCnt(context.Context) (int, error) {
return 0, nil
}
func (m *memStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
m.mu.Lock()
defer m.mu.Unlock()
t, ok := m.templates[key]
if !ok {
return stats.Statistic{}, false
}
return t, true
}
func (m *memStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
m.mu.Lock()
defer m.mu.Unlock()
m.templates[key] = stat
}
type bucketKey [22]byte
func getBucketKey(h hash.Hash, l int) bucketKey {
var k bucketKey
copy(k[:hash.ByteLen], h[:])
binary.BigEndian.PutUint16(k[hash.ByteLen:], uint16(l))
return k
}
func (m *memStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
m.mu.Lock()
defer m.mu.Unlock()
k := getBucketKey(h, l)
r, ok := m.bounds[k]
if !ok {
return nil, false
}
return r, true
}
func (m *memStats) PutBound(h hash.Hash, r sql.Row, l int) {
m.mu.Lock()
defer m.mu.Unlock()
k := getBucketKey(h, l)
m.bounds[k] = r
}
func (m *memStats) GcMark(from StatsKv, nodes []tree.Node, buckets []*stats.Bucket, idxLen int, tb *val.TupleBuilder) bool {
if from.GcGen() > m.GcGen() {
return false
}
m.mu.Lock()
defer m.mu.Unlock()
for i, b := range buckets {
h := nodes[i].HashOf()
k := getBucketKey(h, idxLen)
if i == 0 {
m.bounds[k], _ = from.GetBound(h, idxLen)
}
m.buckets[k] = b
m.gcFlusher[tb] = append(m.gcFlusher[tb], k)
}
return true
}
func (m *memStats) GcGen() uint64 {
m.mu.Lock()
defer m.mu.Unlock()
return m.gcGen
}
func (m *memStats) Len() int {
m.mu.Lock()
defer m.mu.Unlock()
return len(m.buckets)
}
func (m *memStats) PutBucket(_ context.Context, h hash.Hash, b *stats.Bucket, _ *val.TupleBuilder) error {
m.mu.Lock()
defer m.mu.Unlock()
k := getBucketKey(h, len(b.BoundVal))
m.buckets[k] = b
return nil
}
func (m *memStats) GetBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
m.mu.Lock()
defer m.mu.Unlock()
if h.IsEmpty() {
return nil, false, nil
}
k := getBucketKey(h, tupB.Desc.Count())
b, ok := m.buckets[k]
return b, ok, nil
}
func (m *memStats) Flush(_ context.Context) (int, error) {
m.mu.Lock()
defer m.mu.Unlock()
if m.gcFlusher != nil {
m.gcFlusher = nil
}
return 0, nil
}
func NewProllyStats(ctx context.Context, destDb dsess.SqlDatabase) (*prollyStats, error) {
sch := schema.StatsTableDoltSchema
kd, vd := sch.GetMapDescriptors(nil)
keyBuilder := val.NewTupleBuilder(kd)
valueBuilder := val.NewTupleBuilder(vd)
newMap, err := prolly.NewMapFromTuples(ctx, destDb.DbData().Ddb.NodeStore(), kd, vd)
if err != nil {
return nil, err
}
return &prollyStats{
mu: sync.Mutex{},
destDb: destDb,
kb: keyBuilder,
vb: valueBuilder,
m: newMap.Mutate(),
mem: NewMemStats(),
}, nil
}
type prollyStats struct {
mu sync.Mutex
destDb dsess.SqlDatabase
kb, vb *val.TupleBuilder
m *prolly.MutableMap
newM *prolly.MutableMap
mem *memStats
}
func (p *prollyStats) Len() int {
return p.mem.Len()
}
func (p *prollyStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
return p.mem.GetTemplate(key)
}
func (p *prollyStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
p.mem.PutTemplate(key, stat)
}
func (p *prollyStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
return p.mem.GetBound(h, l)
}
func (p *prollyStats) PutBound(h hash.Hash, r sql.Row, l int) {
p.mem.PutBound(h, r, l)
}
func (p *prollyStats) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error {
if err := p.mem.PutBucket(ctx, h, b, tupB); err != nil {
return err
}
k, err := p.encodeHash(h, tupB.Desc.Count())
if err != nil {
return err
}
v, err := p.encodeBucket(ctx, b, tupB)
if err != nil {
return err
}
p.mu.Lock()
defer p.mu.Unlock()
return p.m.Put(ctx, k, v)
}
func (p *prollyStats) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
if h.IsEmpty() {
return nil, false, nil
}
b, ok, err := p.mem.GetBucket(ctx, h, tupB)
if err != nil {
return nil, false, err
}
if ok {
return b, true, nil
}
// missing bucket and not GC'ing, try disk
k, err := p.encodeHash(h, tupB.Desc.Count())
if err != nil {
return nil, false, err
}
var v val.Tuple
err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error {
if key != nil {
ok = true
v = value
}
return nil
})
if !ok || err != nil {
return nil, false, err
}
b, err = p.decodeBucketTuple(ctx, v, tupB)
if err != nil {
return nil, false, err
}
p.mem.PutBucket(ctx, h, b, tupB)
return b, true, nil
}
func (p *prollyStats) GcGen() uint64 {
return p.mem.gcGen
}
func (p *prollyStats) LoadFromMem(ctx context.Context) error {
p.mem.mu.Lock()
defer p.mem.mu.Unlock()
for tb, keys := range p.mem.gcFlusher {
for _, key := range keys {
b, ok := p.mem.buckets[key]
if !ok {
return fmt.Errorf("memory KV inconsistent, missing bucket for: %s", key)
}
tupK, err := p.encodeHash(hash.New(key[:hash.ByteLen]), tb.Desc.Count())
tupV, err := p.encodeBucket(ctx, b, tb)
if err != nil {
return err
}
if err := p.m.Put(ctx, tupK, tupV); err != nil {
return err
}
}
}
p.mem.gcFlusher = nil
return nil
}
func (p *prollyStats) Flush(ctx context.Context) (int, error) {
if err := p.LoadFromMem(ctx); err != nil {
return 0, err
}
p.mu.Lock()
defer p.mu.Unlock()
flushedMap, err := p.m.Map(ctx)
if err != nil {
return 0, err
}
if err := p.destDb.DbData().Ddb.SetStatistics(ctx, "main", flushedMap.HashOf()); err != nil {
return 0, err
}
p.m = flushedMap.Mutate()
cnt, err := flushedMap.Count()
return cnt, err
}
func (p *prollyStats) encodeHash(h hash.Hash, len int) (val.Tuple, error) {
p.mu.Lock()
defer p.mu.Unlock()
p.kb.PutInt64(0, int64(len))
if err := p.kb.PutString(1, h.String()); err != nil {
return nil, err
}
return p.kb.Build(p.m.NodeStore().Pool()), nil
}
func (p *prollyStats) decodeHashTuple(v val.Tuple) (int, hash.Hash, error) {
l, ok := p.kb.Desc.GetInt64(0, v)
hStr, ok := p.kb.Desc.GetString(1, v)
if !ok {
return 0, hash.Hash{}, fmt.Errorf("unexpected null hash")
}
return int(l), hash.Parse(hStr), nil
}
func (p *prollyStats) decodeBucketTuple(ctx context.Context, v val.Tuple, tupB *val.TupleBuilder) (*stats.Bucket, error) {
var row []interface{}
for i := 0; i < p.vb.Desc.Count(); i++ {
f, err := tree.GetField(ctx, p.vb.Desc, i, v, p.m.NodeStore())
if err != nil {
return nil, err
}
row = append(row, f)
}
version := row[0]
if version != schema.StatsVersion {
return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
}
rowCount := row[1].(int64)
distinctCount := row[2].(int64)
nullCount := row[3].(int64)
boundRowStr := row[4].(string)
upperBoundCnt := row[5].(int64)
mcvCountsStr := row[10].(string)
boundRow, err := DecodeRow(ctx, p.m.NodeStore(), boundRowStr, tupB)
if err != nil {
return nil, err
}
var mcvCnts []uint64
if len(mcvCountsStr) > 0 {
for _, c := range strings.Split(mcvCountsStr, ",") {
cnt, err := strconv.ParseInt(c, 10, 64)
if err != nil {
return nil, err
}
mcvCnts = append(mcvCnts, uint64(cnt))
}
}
mcvs := make([]sql.Row, len(mcvCnts))
for i, v := range row[6 : 6+len(mcvCnts)] {
if v != nil && v != "" {
row, err := DecodeRow(ctx, p.m.NodeStore(), v.(string), tupB)
if err != nil {
return nil, err
}
mcvs[i] = row
}
}
return &stats.Bucket{
RowCnt: uint64(rowCount),
DistinctCnt: uint64(distinctCount),
NullCnt: uint64(nullCount),
McvsCnt: mcvCnts,
BoundCnt: uint64(upperBoundCnt),
BoundVal: boundRow,
McvVals: mcvs,
}, nil
}
var mcvTypes = []sql.Type{types.Int16, types.Int16, types.Int16, types.Int16}
func (p *prollyStats) encodeBucket(ctx context.Context, b *stats.Bucket, tupB *val.TupleBuilder) (val.Tuple, error) {
p.mu.Lock()
defer p.mu.Unlock()
p.vb.PutInt64(0, schema.StatsVersion)
p.vb.PutInt64(1, int64(b.RowCount()))
p.vb.PutInt64(2, int64(b.DistinctCount()))
p.vb.PutInt64(3, int64(b.NullCount()))
boundRow, err := EncodeRow(ctx, p.m.NodeStore(), b.UpperBound(), tupB)
if err != nil {
return nil, err
}
p.vb.PutString(4, string(boundRow))
p.vb.PutInt64(5, int64(b.BoundCount()))
for i, r := range b.Mcvs() {
mcvRow, err := EncodeRow(ctx, p.m.NodeStore(), r, tupB)
if err != nil {
return nil, err
}
p.vb.PutString(6+i, string(mcvRow))
}
var mcvCntsRow sql.Row
for _, v := range b.McvCounts() {
mcvCntsRow = append(mcvCntsRow, int(v))
}
p.vb.PutString(10, stats.StringifyKey(mcvCntsRow, mcvTypes[:len(mcvCntsRow)]))
return p.vb.Build(p.m.NodeStore().Pool()), nil
}
func (p *prollyStats) NewEmpty(ctx context.Context) (StatsKv, error) {
kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors(nil)
newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd)
if err != nil {
return nil, err
}
m := newMap.Mutate()
return &prollyStats{m: m, destDb: p.destDb, kb: p.kb, vb: p.vb}, nil
}
func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
for i := range tb.Desc.Count() {
v := r[i]
if v == nil {
continue
}
if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
return nil, err
}
}
return tb.Build(ns.Pool()), nil
}
func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
tup := []byte(s)
r := make(sql.Row, tb.Desc.Count())
var err error
for i, _ := range r {
r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
if err != nil {
return nil, err
}
}
return r, nil
}
func (sc *StatsController) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.kv.PutBucket(ctx, h, b, tupB)
}
func (sc *StatsController) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.kv.GetBucket(ctx, h, tupB)
}
func (sc *StatsController) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.kv.GetTemplate(key)
}
func (sc *StatsController) PutTemplate(key templateCacheKey, stat stats.Statistic) {
sc.mu.Lock()
defer sc.mu.Unlock()
sc.kv.PutTemplate(key, stat)
}
func (sc *StatsController) GetBound(h hash.Hash, len int) (sql.Row, bool) {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.kv.GetBound(h, len)
}
func (sc *StatsController) PutBound(h hash.Hash, r sql.Row, l int) {
sc.mu.Lock()
defer sc.mu.Unlock()
sc.kv.PutBound(h, r, l)
}
func (sc *StatsController) Flush(ctx context.Context) (int, error) {
sqlCtx, err := sc.ctxGen(ctx)
if err != nil {
return 0, err
}
defer sql.SessionEnd(sqlCtx.Session)
sql.SessionCommandBegin(sqlCtx.Session)
defer sql.SessionCommandEnd(sqlCtx.Session)
sc.mu.Lock()
defer sc.mu.Unlock()
defer sc.signalListener(leFlush)
return sc.kv.Flush(sqlCtx)
}
func (sc *StatsController) Len() int {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.kv.Len()
}
func (sc *StatsController) GcGen() uint64 {
sc.mu.Lock()
defer sc.mu.Unlock()
return sc.kv.GcGen()
}
@@ -0,0 +1,200 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"strings"
"testing"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly/message"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/types"
"github.com/dolthub/dolt/go/store/val"
)
func TestProllyKv(t *testing.T) {
threads := sql.NewBackgroundThreads()
prollyKv := newTestProllyKv(t, threads)
h := hash.Parse(strings.Repeat("a", hash.StringLen))
h2 := hash.Parse(strings.Repeat("b", hash.StringLen))
k := getBucketKey(h, 2)
tupB := val.NewTupleBuilder(val.NewTupleDescriptor(
val.Type{Enc: val.Int64Enc, Nullable: true},
val.Type{Enc: val.StringEnc, Nullable: true},
))
t.Run("TestBoundsRoundTrip", func(t *testing.T) {
exp := sql.Row{1, 1}
prollyKv.PutBound(h, exp, 2)
cmp, ok := prollyKv.GetBound(h, 2)
require.True(t, ok)
require.Equal(t, exp, cmp)
_, ok = prollyKv.GetBound(h2, 2)
require.False(t, ok)
})
t.Run("TestTemplatesRoundTrip", func(t *testing.T) {
exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}}
key := templateCacheKey{
h: h,
idxName: "PRIMARY",
}
prollyKv.PutTemplate(key, exp)
cmp, ok := prollyKv.GetTemplate(key)
require.True(t, ok)
require.Equal(t, exp, cmp)
key2 := templateCacheKey{
h: h2,
idxName: "PRIMARY",
}
_, ok = prollyKv.GetTemplate(key2)
require.False(t, ok)
})
t.Run("TestBucketsRoundTrip", func(t *testing.T) {
exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
err := prollyKv.PutBucket(context.Background(), h, exp, tupB)
require.NoError(t, err)
cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, exp, cmp)
// delete from memory, should pull from disk when |tupB| supplied
delete(prollyKv.mem.buckets, k)
cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, exp.RowCnt, cmp.RowCnt)
require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt)
require.Equal(t, exp.NullCnt, cmp.NullCnt)
require.Equal(t, exp.McvsCnt, cmp.McvsCnt)
require.Equal(t, exp.McvVals[0], cmp.McvVals[0])
require.Equal(t, exp.McvVals[1], cmp.McvVals[1])
require.Equal(t, exp.McvVals[2], cmp.McvVals[2])
require.Equal(t, exp.McvVals[3], cmp.McvVals[3])
require.Equal(t, exp.BoundVal, cmp.BoundVal)
require.Equal(t, exp.BoundCnt, cmp.BoundCnt)
})
t.Run("TestNilMcvsRoundTrip", func(t *testing.T) {
exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}}).(*stats.Bucket)
err := prollyKv.PutBucket(context.Background(), h, exp, tupB)
delete(prollyKv.mem.buckets, k)
cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, exp.RowCnt, cmp.RowCnt)
require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt)
require.Equal(t, exp.NullCnt, cmp.NullCnt)
require.Equal(t, exp.McvsCnt, cmp.McvsCnt)
require.Equal(t, len(exp.McvVals), len(cmp.McvVals))
require.Equal(t, exp.McvVals[0], cmp.McvVals[0])
require.Equal(t, exp.McvVals[1], cmp.McvVals[1])
require.Equal(t, exp.BoundVal, cmp.BoundVal)
require.Equal(t, exp.BoundCnt, cmp.BoundCnt)
})
t.Run("TestGcGenBlocking", func(t *testing.T) {
to := NewMemStats()
from := NewMemStats()
from.gcGen = 1
require.False(t, to.GcMark(from, nil, nil, 0, nil))
})
t.Run("TestGcMarkFlush", func(t *testing.T) {
ctx := context.Background()
bthreads := sql.NewBackgroundThreads()
defer bthreads.Shutdown()
prev := NewMemStats()
nodes1, bucks1 := testNodes(t, 10, 1)
nodes2, bucks2 := testNodes(t, 10, 2)
nodes3, bucks3 := testNodes(t, 10, 3)
for i := range nodes1 {
require.NoError(t, prev.PutBucket(ctx, nodes1[i].HashOf(), bucks1[i], tupB))
}
for i := range nodes2 {
require.NoError(t, prev.PutBucket(ctx, nodes2[i].HashOf(), bucks2[i], tupB))
}
for i := range nodes3 {
require.NoError(t, prev.PutBucket(ctx, nodes3[i].HashOf(), bucks3[i], tupB))
}
require.Equal(t, 30, prev.Len())
to := NewMemStats()
require.True(t, to.GcMark(prev, nodes1, bucks1, 2, tupB))
require.True(t, to.GcMark(prev, nodes2, bucks2, 2, tupB))
require.Equal(t, 1, len(to.gcFlusher))
require.Equal(t, 20, len(to.gcFlusher[tupB]))
require.Equal(t, 20, to.Len())
kv := newTestProllyKv(t, bthreads)
kv.mem = to
cnt, err := kv.Flush(ctx)
require.NoError(t, err)
require.Equal(t, 20, cnt)
})
}
func newTestProllyKv(t *testing.T, threads *sql.BackgroundThreads) *prollyStats {
dEnv := dtestutils.CreateTestEnv()
sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads)
ctx.Session.SetClient(sql.Client{
User: "billy boy",
Address: "bigbillie@fake.horse",
})
require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb"))
require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
startDbs := sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx)
kv, err := NewProllyStats(ctx, startDbs[0].(dsess.SqlDatabase))
require.NoError(t, err)
return kv
}
func testNodes(t *testing.T, cnt int, seed uint8) ([]tree.Node, []*stats.Bucket) {
ts := &chunks.TestStorage{}
ns := tree.NewNodeStore(ts.NewViewWithFormat(types.Format_DOLT.VersionString()))
s := message.NewBlobSerializer(ns.Pool())
var nodes []tree.Node
var buckets []*stats.Bucket
for i := range cnt {
vals := [][]byte{{uint8(i), seed, 1, 1}}
msg := s.Serialize([][]byte{{0}}, vals, []uint64{1}, 0)
node, _, err := tree.NodeFromBytes(msg)
require.NoError(t, err)
nodes = append(nodes, node)
buckets = append(buckets, &stats.Bucket{RowCnt: uint64(i), BoundVal: sql.Row{i, "col2"}})
}
return nodes, buckets
}
@@ -1,535 +0,0 @@
// Copyright 2023 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"errors"
"fmt"
"path/filepath"
"strings"
"sync"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly/tree"
)
var ErrFailedToLoad = errors.New("failed to load statistics")
type indexMeta struct {
qual sql.StatQualifier
cols []string
newNodes []tree.Node
// updateOrdinals are [start, stop] tuples for each update chunk
updateOrdinals []updateOrdinal
keepChunks []sql.HistogramBucket
dropChunks []sql.HistogramBucket
allAddrs []hash.Hash
}
type updateOrdinal struct {
start, stop uint64
}
func NewProvider(pro *sqle.DoltDatabaseProvider, sf StatsFactory) *Provider {
return &Provider{
pro: pro,
sf: sf,
mu: &sync.Mutex{},
statDbs: make(map[string]Database),
autoCtxCancelers: make(map[string]context.CancelFunc),
analyzeCtxCancelers: make(map[string]context.CancelFunc),
status: make(map[string]string),
lockedTables: make(map[string]bool),
}
}
// Provider is the engine interface for reading and writing index statistics.
// Each database has its own statistics table that all tables/indexes in a db
// share.
type Provider struct {
mu *sync.Mutex
pro *sqle.DoltDatabaseProvider
sf StatsFactory
statDbs map[string]Database
autoCtxCancelers map[string]context.CancelFunc
analyzeCtxCancelers map[string]context.CancelFunc
starter sqle.InitDatabaseHook
status map[string]string
lockedTables map[string]bool
}
// each database has one statistics table that is a collection of the
// table stats in the database
type dbToStats struct {
mu *sync.Mutex
dbName string
stats map[sql.StatQualifier]*DoltStats
statsDatabase Database
latestTableHashes map[string]hash.Hash
}
func newDbStats(dbName string) *dbToStats {
return &dbToStats{
mu: &sync.Mutex{},
dbName: dbName,
stats: make(map[sql.StatQualifier]*DoltStats),
latestTableHashes: make(map[string]hash.Hash),
}
}
var _ sql.StatsProvider = (*Provider)(nil)
func (p *Provider) Close() error {
var lastErr error
for _, db := range p.statDbs {
if err := db.Close(); err != nil {
lastErr = err
}
}
return lastErr
}
func (p *Provider) TryLockForUpdate(branch, db, table string) bool {
p.mu.Lock()
defer p.mu.Unlock()
lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
if ok := p.lockedTables[lockId]; ok {
return false
}
p.lockedTables[lockId] = true
return true
}
func (p *Provider) UnlockTable(branch, db, table string) {
p.mu.Lock()
defer p.mu.Unlock()
lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
p.lockedTables[lockId] = false
return
}
func (p *Provider) StartRefreshThread(ctx *sql.Context, pro dsess.DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error {
err := p.starter(ctx, pro.(*sqle.DoltDatabaseProvider), name, env, db)
if err != nil {
p.UpdateStatus(name, fmt.Sprintf("error restarting thread %s: %s", name, err.Error()))
return err
}
p.UpdateStatus(name, fmt.Sprintf("restarted thread: %s", name))
return nil
}
func (p *Provider) SetStarter(hook sqle.InitDatabaseHook) {
p.starter = hook
}
func (p *Provider) CancelRefreshThread(dbName string) {
p.mu.Lock()
if cancel, ok := p.autoCtxCancelers[dbName]; ok {
cancel()
}
p.mu.Unlock()
p.UpdateStatus(dbName, fmt.Sprintf("cancelled thread: %s", dbName))
}
func (p *Provider) ThreadStatus(dbName string) string {
p.mu.Lock()
defer p.mu.Unlock()
if msg, ok := p.status[dbName]; ok {
return msg
}
return "no active stats thread"
}
func (p *Provider) TrackedBranches(dbName string) []string {
db, ok := p.getStatDb(dbName)
if !ok {
return nil
}
return db.Branches()
}
func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return nil, nil
}
var schemaName string
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
}
return p.GetTableDoltStats(ctx, branch, db, schemaName, table.Name())
}
func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) {
statDb, ok := p.getStatDb(db)
if !ok || statDb == nil {
return nil, nil
}
if branch == "" {
dSess := dsess.DSessFromSess(ctx.Session)
var err error
branch, err = dSess.GetBranch()
if err != nil {
return nil, nil
}
}
var ret []sql.Statistic
for _, qual := range statDb.ListStatQuals(branch) {
if strings.EqualFold(db, qual.Database) && strings.EqualFold(schema, qual.Sch) && strings.EqualFold(table, qual.Tab) {
stat, _ := statDb.GetStat(branch, qual)
ret = append(ret, stat)
}
}
return ret, nil
}
func (p *Provider) setStatDb(name string, db Database) {
p.mu.Lock()
defer p.mu.Unlock()
p.statDbs[name] = db
}
func (p *Provider) getStatDb(name string) (Database, bool) {
p.mu.Lock()
defer p.mu.Unlock()
statDb, ok := p.statDbs[strings.ToLower(name)]
return statDb, ok
}
func (p *Provider) deleteStatDb(name string) {
p.mu.Lock()
defer p.mu.Unlock()
delete(p.statDbs, strings.ToLower(name))
}
func (p *Provider) SetStats(ctx *sql.Context, s sql.Statistic) error {
statDb, ok := p.getStatDb(s.Qualifier().Db())
if !ok {
return nil
}
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return nil
}
doltStat, err := DoltStatsFromSql(s)
if err != nil {
return err
}
p.UpdateStatus(s.Qualifier().Db(), fmt.Sprintf("refreshed %s", s.Qualifier().Db()))
return statDb.SetStat(ctx, branch, s.Qualifier(), doltStat)
}
func (p *Provider) getQualStats(ctx *sql.Context, qual sql.StatQualifier) (*DoltStats, bool) {
statDb, ok := p.getStatDb(qual.Db())
if !ok {
return nil, false
}
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return nil, false
}
return statDb.GetStat(branch, qual)
}
func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) (sql.Statistic, bool) {
stat, ok := p.getQualStats(ctx, qual)
if !ok {
return nil, false
}
return stat, true
}
func (p *Provider) DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error {
statDb, ok := p.getStatDb(db)
if !ok {
return nil
}
p.mu.Lock()
defer p.mu.Unlock()
p.status[db] = "dropped"
return statDb.DeleteBranchStats(ctx, branch, flush)
}
func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error {
statDb, ok := p.getStatDb(db)
if !ok {
return nil
}
for _, branch := range statDb.Branches() {
// remove provider access
p.DropBranchDbStats(ctx, branch, db, flush)
}
if flush {
p.deleteStatDb(db)
}
return nil
}
func (p *Provider) DropStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) error {
statDb, ok := p.getStatDb(qual.Db())
if !ok {
return nil
}
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return nil
}
if _, ok := statDb.GetStat(branch, qual); ok {
statDb.DeleteStats(ctx, branch, qual)
p.UpdateStatus(qual.Db(), fmt.Sprintf("dropped statisic: %s", qual.String()))
}
return nil
}
func (p *Provider) UpdateStatus(db string, msg string) {
p.mu.Lock()
defer p.mu.Unlock()
p.status[db] = msg
}
func (p *Provider) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
statDb, ok := p.getStatDb(db)
if !ok {
return 0, sql.ErrDatabaseNotFound.New(db)
}
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return 0, err
}
var schemaName string
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
}
priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
if !ok {
return 0, nil
}
return priStats.RowCount(), nil
}
func (p *Provider) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
statDb, ok := p.getStatDb(db)
if !ok {
return 0, sql.ErrDatabaseNotFound.New(db)
}
dSess := dsess.DSessFromSess(ctx.Session)
branch, err := dSess.GetBranch()
if err != nil {
return 0, err
}
var schemaName string
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
}
priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
if !ok {
return 0, nil
}
return priStats.AvgSize(), nil
}
func (p *Provider) Prune(ctx *sql.Context) error {
dSess := dsess.DSessFromSess(ctx.Session)
for _, sqlDb := range p.pro.DoltDatabases() {
dbName := strings.ToLower(sqlDb.Name())
sqlDb, ok, err := dSess.Provider().SessionDatabase(ctx, dbName)
if err != nil {
return err
}
if !ok {
continue
}
statDb, ok := p.getStatDb(dbName)
if !ok {
continue
}
// Canceling refresh thread prevents background thread from
// making progress. Prune should succeed.
p.CancelRefreshThread(dbName)
tables, err := sqlDb.GetTableNames(ctx)
if err != nil {
return err
}
for _, branch := range statDb.Branches() {
err := func() error {
// function closure ensures safe defers
var stats []sql.Statistic
for _, t := range tables {
// XXX: avoid races with ANALYZE with the table locks.
// Either concurrent purge or analyze (or both) will fail.
if !p.TryLockForUpdate(branch, dbName, t) {
p.mu.Lock()
fmt.Println(p.lockedTables)
p.mu.Unlock()
return fmt.Errorf("concurrent statistics update and prune; retry prune when update is finished")
}
defer p.UnlockTable(branch, dbName, t)
tableStats, err := p.GetTableDoltStats(ctx, branch, dbName, sqlDb.SchemaName(), t)
if err != nil {
return err
}
stats = append(stats, tableStats...)
}
if err := p.DropBranchDbStats(ctx, branch, dbName, true); err != nil {
return err
}
for _, s := range stats {
ds, ok := s.(*DoltStats)
if !ok {
return fmt.Errorf("unexpected statistics type found: %T", s)
}
if err := statDb.SetStat(ctx, branch, ds.Qualifier(), ds); err != nil {
return err
}
}
if err := statDb.Flush(ctx, branch); err != nil {
return err
}
return nil
}()
if err != nil {
return err
}
}
}
return nil
}
func (p *Provider) Purge(ctx *sql.Context) error {
for _, sqlDb := range p.pro.DoltDatabases() {
dbName := strings.ToLower(sqlDb.Name())
tables, err := sqlDb.GetTableNames(ctx)
if err != nil {
return err
}
var branches []string
db, ok := p.getStatDb(dbName)
if ok {
// Canceling refresh thread prevents background thread from
// making progress. Purge should succeed.
p.CancelRefreshThread(dbName)
branches = db.Branches()
for _, branch := range branches {
err := func() error {
for _, t := range tables {
// XXX: avoid races with ANALYZE with the table locks.
// Either concurrent purge or analyze (or both) will fail.
if !p.TryLockForUpdate(branch, dbName, t) {
return fmt.Errorf("concurrent statistics update and prune; retry purge when update is finished")
}
defer p.UnlockTable(branch, dbName, t)
}
err := p.DropBranchDbStats(ctx, branch, dbName, true)
if err != nil {
return fmt.Errorf("failed to drop stats: %w", err)
}
return nil
}()
if err != nil {
return err
}
}
}
// if the database's failed to load, we still want to delete the folder
fs, err := p.pro.FileSystemForDatabase(dbName)
if err != nil {
return err
}
//remove from filesystem
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
if err != nil {
return err
}
if ok, _ := statsFs.Exists(""); ok {
if err := statsFs.Delete("", true); err != nil {
return err
}
}
dropDbLoc, err := statsFs.Abs("")
if err != nil {
return err
}
if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
return err
}
if len(branches) == 0 {
// if stats db was invalid on startup, recreate from baseline
branches = p.getStatsBranches(ctx)
}
p.Load(ctx, fs, sqlDb, branches)
}
return nil
}
@@ -0,0 +1,639 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statspro
import (
"context"
"errors"
"fmt"
"io"
"log"
"strings"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
)
const collectBatchSize = 20
func (sc *StatsController) CollectOnce(ctx context.Context) (string, error) {
genStart := sc.genCnt.Load()
newStats, err := sc.newStatsForRoot(ctx, nil)
if errors.Is(err, context.Canceled) {
return "", nil
} else if err != nil {
return "", err
}
if ok, err := sc.trySwapStats(ctx, genStart, newStats, nil); err != nil || !ok {
return "", err
}
return newStats.String(), nil
}
func (sc *StatsController) runWorker(ctx context.Context) (err error) {
var gcKv *memStats
var newStats *rootStats
gcTicker := time.NewTicker(sc.gcInterval)
for {
// This loops tries to update stats as long as context
// is active. Thread contexts governs who "owns" the update
// process. The generation counters ensure atomic swapping.
gcKv = nil
genStart := sc.genCnt.Load()
select {
case <-gcTicker.C:
sc.setDoGc(false)
default:
}
if sc.gcIsSet() {
gcKv = NewMemStats()
gcKv.gcGen = genStart
}
newStats, err = sc.newStatsForRoot(ctx, gcKv)
if errors.Is(err, context.Canceled) {
return nil
} else if err != nil {
sc.descError("", err)
}
if ok, err := sc.trySwapStats(ctx, genStart, newStats, gcKv); err != nil {
if !ok {
sc.descError("failed to swap stats", err)
} else {
sc.descError("swapped stats with flush failure", err)
}
}
select {
case <-ctx.Done():
// is double check necessary?
return context.Cause(ctx)
default:
}
}
}
func (sc *StatsController) trySwapStats(ctx context.Context, prevGen uint64, newStats *rootStats, gcKv *memStats) (ok bool, err error) {
if newStats == nil {
return false, fmt.Errorf("attempted to place a nil stats object")
}
sc.mu.Lock()
defer sc.mu.Unlock()
if ctx.Err() != nil {
// final ctx check in critical section, avoid races on
// stats after calling stop
return false, context.Cause(ctx)
}
signal := leSwap
defer func() {
if ok {
sc.logger.Debugf("stats successful swap: %s\n", newStats.String())
sc.signalListener(signal)
}
}()
if sc.genCnt.CompareAndSwap(prevGen, prevGen+1) {
// Replace stats and new Kv if no replacements happened
// in-between.
sc.Stats = newStats
if gcKv != nil {
signal |= leGc
// The new KV has all buckets for the latest root stats,
// background job will to swap the disk location and put
// entries into a prolly tree.
if prevGen != gcKv.GcGen() {
err = fmt.Errorf("gc gen didn't match update gen")
return
}
sc.doGc = false
sc.gcCnt++
sc.kv = gcKv
ok = true
if !sc.memOnly {
func() {
sc.mu.Unlock()
defer sc.mu.Lock()
if err := sc.sq.DoSync(ctx, func() error {
return sc.rotateStorage(ctx)
}); err != nil {
sc.descError("", err)
}
}()
}
}
// Flush new changes to disk, unlocked
if !sc.memOnly {
func() {
sc.mu.Unlock()
defer sc.mu.Lock()
if err := sc.sq.DoSync(ctx, func() error {
_, err := sc.Flush(ctx)
return err
}); err != nil {
sc.descError("", err)
}
}()
}
signal = signal | leFlush
return true, nil
}
return false, nil
}
func (sc *StatsController) newStatsForRoot(baseCtx context.Context, gcKv *memStats) (newStats *rootStats, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("issuer panicked running work: %s", r)
}
if err != nil {
sc.descError("stats update interrupted", err)
}
}()
ctx, err := sc.ctxGen(baseCtx)
if err != nil {
return nil, err
}
defer sql.SessionEnd(ctx.Session)
dSess := dsess.DSessFromSess(ctx.Session)
var dbs []sql.Database
func() {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
dbs = dSess.Provider().AllDatabases(ctx)
}()
newStats = newRootStats()
for _, db := range dbs {
sqlDb, ok := db.(sqle.Database)
if !ok {
continue
}
var branches []ref.DoltRef
if err := sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
ddb, ok := dSess.GetDoltDB(ctx, db.Name())
if !ok {
return fmt.Errorf("get dolt db dolt database not found %s", db.Name())
}
var err error // races with outer err
branches, err = ddb.GetBranches(ctx)
return err
}); err != nil {
return nil, err
}
for _, br := range branches {
// this call avoids the chunkstore
sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), br.GetPath(), br.GetPath()+"/"+sqlDb.AliasedName())
if err != nil {
sc.descError("revisionForBranch", err)
continue
}
var schDbs []sql.DatabaseSchema
if err := sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
schDbs, err = sqlDb.AllSchemas(ctx)
return err
}); err != nil {
sc.descError("getDatabaseSchemas", err)
continue
}
for _, sqlDb := range schDbs {
switch sqlDb.SchemaName() {
case "dolt", "information_schema", "pg_catalog":
continue
}
var tableNames []string
if err := sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
tableNames, err = sqlDb.GetTableNames(ctx)
return err
}); err != nil {
sc.descError("getTableNames", err)
continue
}
newStats.DbCnt++
for _, tableName := range tableNames {
err := sc.updateTable(ctx, newStats, tableName, sqlDb.(dsess.SqlDatabase), gcKv)
if err != nil {
return nil, err
}
}
}
}
}
return newStats, nil
}
func (sc *StatsController) preexistingStats(k tableIndexesKey, h hash.Hash) ([]*stats.Statistic, bool) {
sc.mu.Lock()
defer sc.mu.Unlock()
if sc.Stats.hashes[k].Equal(h) {
return sc.Stats.stats[k], true
}
return nil, false
}
func (sc *StatsController) finalizeHistogram(template stats.Statistic, buckets []*stats.Bucket, firstBound sql.Row) *stats.Statistic {
template.LowerBnd = firstBound
for _, b := range buckets {
// accumulate counts
template.RowCnt += b.RowCnt
template.DistinctCnt += b.DistinctCnt
template.NullCnt += b.NullCnt
template.Hist = append(template.Hist, b)
}
return &template
}
func (sc *StatsController) collectIndexNodes(ctx *sql.Context, prollyMap prolly.Map, idxLen int, nodes []tree.Node) ([]*stats.Bucket, sql.Row, int, error) {
updater := newBucketBuilder(sql.StatQualifier{}, idxLen, prollyMap.KeyDesc())
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen))
firstNodeHash := nodes[0].HashOf()
lowerBound, ok := sc.kv.GetBound(firstNodeHash, idxLen)
if !ok {
sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
var err error
lowerBound, err = firstRowForIndex(ctx, idxLen, prollyMap, keyBuilder)
if err != nil {
return fmt.Errorf("get histogram bucket for node; %w", err)
}
if sc.Debug {
log.Printf("put bound: %s: %v\n", firstNodeHash.String()[:5], lowerBound)
}
sc.kv.PutBound(firstNodeHash, lowerBound, idxLen)
return nil
})
}
var writes int
var offset uint64
for i := 0; i < len(nodes); {
err := sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
newWrites := 0
for i < len(nodes) && newWrites < collectBatchSize {
n := nodes[i]
i++
treeCnt, err := n.TreeCount()
if err != nil {
return err
}
start, stop := offset, offset+uint64(treeCnt)
offset = stop
if _, ok, err := sc.GetBucket(ctx, n.HashOf(), keyBuilder); err != nil {
return err
} else if ok {
continue
}
writes++
newWrites++
updater.newBucket()
// we read exclusive range [node first key, next node first key)
iter, err := prollyMap.IterOrdinalRange(ctx, start, stop)
if err != nil {
return err
}
for {
// stats key will be a prefix of the index key
keyBytes, _, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return err
}
// build full key
for i := range keyBuilder.Desc.Types {
keyBuilder.PutRaw(i, keyBytes.GetField(i))
}
updater.add(ctx, keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
keyBuilder.Recycle()
}
// finalize the aggregation
newBucket, err := updater.finalize(ctx, prollyMap.NodeStore())
if err != nil {
return err
}
if err := sc.PutBucket(ctx, n.HashOf(), newBucket, keyBuilder); err != nil {
return err
}
}
return nil
})
if err != nil {
return nil, nil, 0, err
}
}
var buckets []*stats.Bucket
for _, n := range nodes {
newBucket, ok, err := sc.GetBucket(ctx, n.HashOf(), keyBuilder)
if err != nil || !ok {
sc.descError(fmt.Sprintf("missing histogram bucket for node %s", n.HashOf().String()[:5]), err)
return nil, nil, 0, err
}
buckets = append(buckets, newBucket)
}
return buckets, lowerBound, writes, nil
}
func (sc *StatsController) updateTable(ctx *sql.Context, newStats *rootStats, tableName string, sqlDb dsess.SqlDatabase, gcKv *memStats) error {
var err error
var sqlTable *sqle.DoltTable
var dTab *doltdb.Table
if err := sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
sqlTable, dTab, err = GetLatestTable(ctx, tableName, sqlDb)
return err
}); err != nil {
return err
}
schemaName := sqlTable.DatabaseSchema().SchemaName()
tableKey := tableIndexesKey{
db: strings.ToLower(sqlDb.AliasedName()),
branch: strings.ToLower(sqlDb.Revision()),
table: strings.ToLower(tableName),
schema: strings.ToLower(schemaName),
}
tableHash, err := dTab.HashOf()
if err != nil {
return err
}
if gcKv == nil {
if stats, ok := sc.preexistingStats(tableKey, tableHash); ok {
newStats.stats[tableKey] = stats
newStats.hashes[tableKey] = tableHash
newStats.TablesSkipped++
return nil
}
}
var indexes []sql.Index
if err := sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
indexes, err = sqlTable.GetIndexes(ctx)
return err
}); err != nil {
return err
}
var newTableStats []*stats.Statistic
for _, sqlIdx := range indexes {
if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() {
continue
}
var idx durable.Index
var err error
var prollyMap prolly.Map
func() {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
if strings.EqualFold(sqlIdx.ID(), "PRIMARY") {
idx, err = dTab.GetRowData(ctx)
} else {
idx, err = dTab.GetIndexRowData(ctx, sqlIdx.ID())
}
if err == nil {
prollyMap, err = durable.ProllyMapFromIndex(idx)
}
}()
if err != nil {
sc.descError("GetRowData", err)
continue
}
var template stats.Statistic
if err := sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
_, template, err = sc.getTemplate(ctx, sqlTable, sqlIdx)
if err != nil {
return fmt.Errorf("stats collection failed to generate a statistic template: %s.%s.%s:%T; %s", sqlDb.RevisionQualifiedName(), tableName, sqlIdx, sqlIdx, err.Error())
}
return nil
}); err != nil {
return err
} else if template.Fds.Empty() {
return fmt.Errorf("failed to creat template for %s/%s/%s/%s", sqlDb.Revision(), sqlDb.AliasedName(), tableName, sqlIdx.ID())
}
template.Qual.Database = sqlDb.AliasedName()
idxLen := len(sqlIdx.Expressions())
var levelNodes []tree.Node
if err = sc.sq.DoSync(ctx, func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
levelNodes, err = tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
if err != nil {
sc.descError("get level", err)
}
return err
}); err != nil {
return err
}
var buckets []*stats.Bucket
var firstBound sql.Row
if len(levelNodes) > 0 {
var writes int
buckets, firstBound, writes, err = sc.collectIndexNodes(ctx, prollyMap, idxLen, levelNodes)
if err != nil {
sc.descError("", err)
continue
}
newStats.BucketWrites += writes
}
newTableStats = append(newTableStats, sc.finalizeHistogram(template, buckets, firstBound))
if gcKv != nil {
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen))
if !gcKv.GcMark(sc.kv, levelNodes, buckets, idxLen, keyBuilder) {
return fmt.Errorf("GC interrupted updated")
}
if err := func() error {
sql.SessionCommandBegin(ctx.Session)
defer sql.SessionCommandEnd(ctx.Session)
schHash, _, err := sqlTable.IndexCacheKey(ctx)
if err != nil {
return err
}
key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
if t, ok := sc.GetTemplate(key); ok {
gcKv.PutTemplate(key, t)
}
return nil
}(); err != nil {
return err
}
}
}
newStats.stats[tableKey] = newTableStats
newStats.hashes[tableKey] = tableHash
newStats.TablesProcessed++
return nil
}
// GetLatestTable will get the WORKING root table for the current database/branch
func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (*sqle.DoltTable, *doltdb.Table, error) {
var db sqle.Database
switch d := sqlDb.(type) {
case sqle.Database:
db = d
case sqle.ReadReplicaDatabase:
db = d.Database
default:
return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
}
sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
if err != nil {
return nil, nil, err
}
if !ok {
return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
}
var dTab *doltdb.Table
var sqleTable *sqle.DoltTable
switch t := sqlTable.(type) {
case *sqle.AlterableDoltTable:
sqleTable = t.DoltTable
dTab, err = t.DoltTable.DoltTable(ctx)
case *sqle.WritableDoltTable:
sqleTable = t.DoltTable
dTab, err = t.DoltTable.DoltTable(ctx)
case *sqle.DoltTable:
sqleTable = t
dTab, err = t.DoltTable(ctx)
default:
err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
}
if err != nil {
return nil, nil, err
}
return sqleTable, dTab, nil
}
type templateCacheKey struct {
h hash.Hash
idxName string
}
func (k templateCacheKey) String() string {
return k.idxName + "/" + k.h.String()[:5]
}
func (sc *StatsController) getTemplate(ctx *sql.Context, sqlTable *sqle.DoltTable, sqlIdx sql.Index) (templateCacheKey, stats.Statistic, error) {
schHash, _, err := sqlTable.IndexCacheKey(ctx)
if err != nil {
return templateCacheKey{}, stats.Statistic{}, err
}
key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
if template, ok := sc.GetTemplate(key); ok {
return key, template, nil
}
fds, colset, err := stats.IndexFds(strings.ToLower(sqlTable.Name()), sqlTable.Schema(), sqlIdx)
if err != nil {
return templateCacheKey{}, stats.Statistic{}, err
}
var class sql.IndexClass
switch {
case sqlIdx.IsSpatial():
class = sql.IndexClassSpatial
case sqlIdx.IsFullText():
class = sql.IndexClassFulltext
default:
class = sql.IndexClassDefault
}
var types []sql.Type
for _, cet := range sqlIdx.ColumnExpressionTypes() {
types = append(types, cet.Type)
}
// xxx: the lower here is load bearing, index comparison
// expects the expressions to be stripped of table name.
tablePrefix := strings.ToLower(sqlTable.Name()) + "."
cols := make([]string, len(sqlIdx.Expressions()))
for i, c := range sqlIdx.Expressions() {
cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
}
template := stats.Statistic{
Qual: sql.NewStatQualifier("", "", sqlTable.Name(), sqlIdx.ID()),
Cols: cols,
Typs: types,
IdxClass: uint8(class),
Fds: fds,
Colset: colset,
}
// We put template twice, once for schema changes with no data
// changes (here), and once when we put chunks to avoid GC dropping
// templates before the finalize job.
sc.PutTemplate(key, template)
return key, template, nil
}
File diff suppressed because it is too large Load Diff
+47 -32
View File
@@ -16,6 +16,7 @@ package sqle
import (
"math"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
@@ -219,18 +220,18 @@ var DoltSystemVariables = []sql.SystemVariable{
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsAutoRefreshEnabled,
Name: dsess.DoltStatsEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
Default: int8(0),
Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBootstrapEnabled,
Name: dsess.DoltStatsPaused,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
Default: int8(0),
Type: types.NewSystemBoolType(dsess.DoltStatsPaused),
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsMemoryOnly,
@@ -240,18 +241,25 @@ var DoltSystemVariables = []sql.SystemVariable{
Default: int8(0),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsAutoRefreshThreshold,
Name: dsess.DoltStatsJobInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
Default: float64(.5),
Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
Default: int64(30 * time.Millisecond / time.Millisecond),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsAutoRefreshInterval,
Name: dsess.DoltStatsGCInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
Default: 600,
Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
Default: int64(time.Hour / time.Millisecond),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsGCEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemBoolType(dsess.DoltStatsGCEnabled),
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBranches,
@@ -446,18 +454,39 @@ func AddDoltSystemVariables() {
Default: int8(0),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsAutoRefreshEnabled,
Name: dsess.DoltStatsEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
Default: int8(0),
Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBootstrapEnabled,
Name: dsess.DoltStatsPaused,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
Default: int8(0),
Type: types.NewSystemBoolType(dsess.DoltStatsPaused),
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsGCInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
Default: int64(time.Hour / time.Millisecond),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsGCEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemBoolType(dsess.DoltStatsGCEnabled),
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsJobInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
Default: int64(30 * time.Millisecond / time.Millisecond),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsMemoryOnly,
@@ -466,20 +495,6 @@ func AddDoltSystemVariables() {
Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
Default: int8(0),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsAutoRefreshThreshold,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
Default: float64(.5),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsAutoRefreshInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
Default: 120,
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBranches,
Dynamic: true,
+2 -2
View File
@@ -129,12 +129,12 @@ func (t *DoltTable) LookupForExpressions(ctx *sql.Context, exprs ...sql.Expressi
return sql.IndexLookup{}, nil, nil, false, nil
}
dbState, ok, err := sess.LookupDbState(ctx, t.db.Name())
dbState, ok, err := sess.LookupDbState(ctx, t.db.AliasedName())
if err != nil {
return sql.IndexLookup{}, nil, nil, false, nil
}
if !ok {
return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.Name())
return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.AliasedName())
}
var lookupCols []expression.LookupColumn
+4 -1
View File
@@ -517,7 +517,10 @@ func SqlRowsFromDurableIndex(idx durable.Index, sch schema.Schema) ([]sql.Row, e
ctx := context.Background()
var sqlRows []sql.Row
if types.Format_Default == types.Format_DOLT {
rowData := durable.ProllyMapFromIndex(idx)
rowData, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return nil, err
}
kd, vd := rowData.Descriptors()
iter, err := rowData.IterAll(ctx)
if err != nil {
@@ -141,6 +141,10 @@ func (db *UserSpaceDatabase) RequestedName() string {
return db.Name()
}
func (db *UserSpaceDatabase) AliasedName() string {
return db.Name()
}
func (db *UserSpaceDatabase) GetSchema(ctx *sql.Context, schemaName string) (sql.DatabaseSchema, bool, error) {
panic(fmt.Sprintf("GetSchema is not implemented for database %T", db))
}
@@ -36,7 +36,10 @@ func getPrimaryProllyWriter(ctx context.Context, t *doltdb.Table, schState *dses
return prollyIndexWriter{}, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return prollyIndexWriter{}, err
}
keyDesc, valDesc := m.Descriptors()
@@ -55,7 +58,10 @@ func getPrimaryKeylessProllyWriter(ctx context.Context, t *doltdb.Table, schStat
return prollyKeylessWriter{}, err
}
m := durable.ProllyMapFromIndex(idx)
m, err := durable.ProllyMapFromIndex(idx)
if err != nil {
return prollyKeylessWriter{}, err
}
keyDesc, valDesc := m.Descriptors()
@@ -116,7 +116,10 @@ func getSecondaryKeylessProllyWriters(ctx context.Context, t *doltdb.Table, schS
if err != nil {
return nil, err
}
m := durable.ProllyMapFromIndex(idxRows)
m, err := durable.ProllyMapFromIndex(idxRows)
if err != nil {
return nil, err
}
keyDesc, _ := m.Descriptors()
@@ -102,7 +102,10 @@ func BuildProllyIndexExternal(ctx *sql.Context, vrw types.ValueReadWriter, ns tr
defer it.Close()
empty, err := durable.NewEmptyIndexFromTableSchema(ctx, vrw, ns, idx, sch)
secondary := durable.ProllyMapFromIndex(empty)
secondary, err := durable.ProllyMapFromIndex(empty)
if err != nil {
return nil, err
}
tupIter := &tupleIterWithCb{iter: it, prefixDesc: prefixDesc, uniqCb: uniqCb}
ret, err := prolly.MutateMapWithTupleIter(ctx, secondary, tupIter)
@@ -150,7 +150,11 @@ func BuildSecondaryIndex(ctx *sql.Context, tbl *doltdb.Table, idx schema.Index,
if err != nil {
return nil, err
}
primary := durable.ProllyMapFromIndex(m)
primary, err := durable.ProllyMapFromIndex(m)
if err != nil {
return nil, err
}
return BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, idx, primary)
default:
@@ -218,7 +222,10 @@ func BuildUniqueProllyIndex(
if err != nil {
return nil, err
}
secondary := durable.ProllyMapFromIndex(empty)
secondary, err := durable.ProllyMapFromIndex(empty)
if err != nil {
return nil, err
}
iter, err := primary.IterAll(ctx)
if err != nil {
@@ -34,12 +34,20 @@ func (b *Buff[T]) Len() int {
return b.len
}
func (b *Buff[T]) Cap() int {
return cap(b.arr)
}
func (b *Buff[T]) At(i int) T {
return *b.at(i)
}
func (b *Buff[T]) at(i int) *T {
if i >= b.Len() {
panic("At on Buff too small")
}
j := (b.front + i) % len(b.arr)
return b.arr[j]
return &b.arr[j]
}
func (b *Buff[T]) Front() T {
@@ -50,6 +58,9 @@ func (b *Buff[T]) Pop() {
if b.Len() == 0 {
panic("Pop empty Buff")
}
// Don't leak entries...
var empty T
*b.at(0) = empty
b.front = (b.front + 1) % len(b.arr)
b.len -= 1
}
+54
View File
@@ -0,0 +1,54 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package valctx
import (
"context"
)
var enabled bool
// Globally enables context validation for the process. If this is not
// called, then the other functions in this package are noops.
func EnableContextValidation() {
enabled = true
}
type ctxKey int
var validationKey ctxKey
func WithContextValidation(ctx context.Context) context.Context {
if !enabled {
return ctx
}
return context.WithValue(ctx, validationKey, new(Validation))
}
type Validation func()
func SetContextValidation(ctx context.Context, validation Validation) {
if !enabled {
return
}
*ctx.Value(validationKey).(*Validation) = validation
}
func ValidateContext(ctx context.Context) {
if !enabled {
return
}
(*ctx.Value(validationKey).(*Validation))()
}
@@ -21,9 +21,6 @@ import (
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/jmoiron/sqlx"
"github.com/google/uuid"
)
@@ -149,10 +146,6 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) {
return nil, err
}
if err := t.collectStats(ctx); err != nil {
return nil, err
}
fmt.Println("Running test", t.test.GetName())
rs, err := t.run(ctx)
@@ -162,76 +155,3 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) {
return rs, nil
}
func (t *sysbenchTesterImpl) collectStats(ctx context.Context) error {
if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") {
db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/test", t.serverConfig.GetHost(), t.serverConfig.GetPort()))
if err != nil {
return err
}
return collectStats(ctx, db)
}
return nil
}
func collectStats(ctx context.Context, db *sqlx.DB) error {
c, err := db.Connx(ctx)
if err != nil {
return err
}
{
// configuration, restart, and check needs to be in the same session
tx, err := c.BeginTxx(ctx, nil)
if err != nil {
return err
}
if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_enabled = 1;"); err != nil {
return err
}
if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_interval = 0;"); err != nil {
return err
}
if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_interval = 0;"); err != nil {
return err
}
if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"); err != nil {
return err
}
if _, err := tx.Exec("call dolt_stats_restart();"); err != nil {
return err
}
rows := map[string]interface{}{"cnt": 0}
tick := time.NewTicker(5 * time.Second)
for {
if rows["cnt"] != 0 {
fmt.Printf("collected %d histogram buckets\n", rows["cnt"])
break
}
select {
case <-tick.C:
res, err := tx.Queryx("select count(*) as cnt from dolt_statistics;")
if err != nil {
return err
}
if !res.Next() {
return fmt.Errorf("failed to set statistics")
}
if err := res.MapScan(rows); err != nil {
return err
}
if err := res.Close(); err != nil {
return err
}
}
}
}
if _, err := c.QueryContext(ctx, "call dolt_stats_stop();"); err != nil {
return err
}
return nil
}
@@ -20,9 +20,6 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/jmoiron/sqlx"
)
type tpccTesterImpl struct {
@@ -54,17 +51,6 @@ func (t *tpccTesterImpl) outputToResult(output []byte) (*Result, error) {
return OutputToResult(output, t.serverConfig.GetServerType(), t.serverConfig.GetVersion(), t.test.GetName(), t.test.GetId(), t.suiteId, t.config.GetRuntimeOs(), t.config.GetRuntimeGoArch(), t.serverParams, t.test.GetParamsToSlice(), nil, false)
}
func (t *tpccTesterImpl) collectStats(ctx context.Context) error {
if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") {
db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/sbt", t.serverConfig.GetHost(), t.serverConfig.GetPort()))
if err != nil {
return err
}
return collectStats(ctx, db)
}
return nil
}
func (t *tpccTesterImpl) prepare(ctx context.Context) error {
args := t.test.GetPrepareArgs(t.serverConfig)
cmd := exec.CommandContext(ctx, t.tpccCommand, args...)
@@ -119,10 +105,6 @@ func (t *tpccTesterImpl) Test(ctx context.Context) (*Result, error) {
return nil, err
}
if err := t.collectStats(ctx); err != nil {
return nil, err
}
fmt.Println("Running test", t.test.GetName())
rs, err := t.run(ctx)
+1 -1
View File
@@ -132,7 +132,7 @@ func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer](
prev := newKey
newKey, newValue = edits.NextMutation(ctx)
if newKey != nil {
assertTrue(order.Compare(ctx, K(newKey), K(prev)) > 0, "expected sorted edits")
assertTrue(order.Compare(ctx, K(newKey), K(prev)) > 0, "expected sorted edits: %v, %v", prev, newKey)
}
}
+2 -2
View File
@@ -629,8 +629,8 @@ func fetchChild(ctx context.Context, ns NodeStore, ref hash.Hash) (Node, error)
return ns.Read(ctx, ref)
}
func assertTrue(b bool, msg string) {
func assertTrue(b bool, msg string, args ...any) {
if !b {
panic("assertion failed: " + msg)
panic(fmt.Sprintf("assertion failed: "+msg, args...))
}
}
+5
View File
@@ -141,6 +141,11 @@ func GetChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m Static
// GetHistogramLevel returns the highest internal level of the tree that has
// more than |low| addresses.
func GetHistogramLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], low int) ([]Node, error) {
if cnt, err := m.Count(); err != nil {
return nil, err
} else if cnt == 0 {
return nil, nil
}
currentLevel := []Node{m.Root}
level := m.Root.Level()
for len(currentLevel) < low && level > 0 {
+2 -1
View File
@@ -15,6 +15,7 @@
package val
import (
"strconv"
"time"
"github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors"
@@ -77,7 +78,7 @@ func NewTupleBuilder(desc TupleDesc) *TupleBuilder {
func (tb *TupleBuilder) Build(pool pool.BuffPool) (tup Tuple) {
for i, typ := range tb.Desc.Types {
if !typ.Nullable && tb.fields[i] == nil {
panic("cannot write NULL to non-NULL field")
panic("cannot write NULL to non-NULL field: " + strconv.Itoa(i))
}
}
return tb.BuildPermissive(pool)
+3 -3
View File
@@ -636,11 +636,11 @@ func (td TupleDesc) formatValue(ctx context.Context, enc Encoding, i int, value
case Hash128Enc:
return hex.EncodeToString(value)
case BytesAddrEnc:
return hex.EncodeToString(value)
return hash.New(value).String()
case StringAddrEnc:
return hex.EncodeToString(value)
return hash.New(value).String()
case CommitAddrEnc:
return hex.EncodeToString(value)
return hash.New(value).String()
case CellEnc:
return hex.EncodeToString(value)
case ExtendedEnc:
+322 -510
View File
@@ -22,12 +22,15 @@ SQL
cd $TMPDIRS/repo2
dolt init
dolt sql -q "SET @@PERSIST.dolt_stats_job_interval = 100"
dolt sql <<SQL
create table xy (x int primary key, y int, key (y,x));
create table ab (a int primary key, b int, key (b,a));
SQL
dolt sql -q "set @@PERSIST.dolt_stats_job_interval = 1;"
cd $TMPDIRS
}
@@ -38,107 +41,344 @@ teardown() {
cd $BATS_TMPDIR
}
@test "stats: empty initial stats" {
@test "stats: dolt_stats_once" {
# running once populates stats and returns valid json response
cd repo2
# disable bootstrap, can only make stats with ANALYZE or background thread
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
dolt sql -q "insert into xy values (0,0), (1,1)"
start_sql_server
sleep 1
stop_sql_server
run dolt sql -r csv -q "select count(*) from dolt_statistics"
run dolt sql -r csv -q "call dolt_stats_once()"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
# setting variables doesn't hang or error
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = .5"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
# auto refresh initialize at server startup
start_sql_server
# need to trigger at least one refresh cycle
sleep 1
# only statistics for non-empty tables are collected
run dolt sql -r csv -q "select database_name, table_name, index_name from dolt_statistics order by index_name"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "database_name,table_name,index_name" ]
[ "${lines[1]}" = "repo2,xy,primary" ]
[ "${lines[2]}" = "repo2,xy,y" ]
# appending new chunks picked up
dolt sql -q "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;"
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "8" ]
# updates picked up
dolt sql -q "update xy set y = 2 where x between 100 and 800"
sleep 1
dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "8" ]
[[ "$output" =~ '{""dbCnt"":1,""bucketWrites"":2,""tablesProcessed"":2,""tablesSkipped"":0}"' ]] || false
}
@test "stats: bootstrap on server startup" {
@test "stats: second once does no work" {
# running once populates stats and returns valid json response
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
# disable higher precedence auto-update
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;"
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_once()"
[ "$status" -eq 0 ]
[[ "${lines[3]}" =~ '{""dbCnt"":1,""bucketWrites"":0,""tablesProcessed"":0,""tablesSkipped"":2}"' ]] || false
}
@test "stats: once after reload does no incremental work" {
# running once populates stats and returns valid json response
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
dolt sql -r csv -q "call dolt_stats_once();"
run dolt sql -r csv -q "call dolt_stats_once();"
[ "$status" -eq 0 ]
[[ "${lines[1]}" =~ '{""dbCnt"":1,""bucketWrites"":0,""tablesProcessed"":2,""tablesSkipped"":0}"' ]] || false
}
@test "stats: dolt_stats_wait" {
# wait stalls until stats are ready
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
run dolt sql -r csv <<EOF
call dolt_stats_restart();
call dolt_stats_wait();
select count(*) from dolt_statistics
EOF
[ "$status" -eq 0 ]
[ "${lines[5]}" = "2" ]
}
@test "stats: dolt_stats_info" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
}
@test "stats: dolt_stats_server_wait" {
# wait stalls until stats are ready
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
start_sql_server
stop_sql_server
dolt sql -r csv -q "call dolt_stats_wait()"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
}
@test "stats: auto-update on server startup" {
@test "stats: dolt_stats_server_paused" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;"
dolt sql -q "set @@PERSIST.dolt_stats_paused = 1;"
start_sql_server
dolt sql -q "call dolt_stats_info('--short')"
run dolt sql -r "call dolt_stats_wait()"
[ "$status" -eq 1 ]
run dolt sql -r "call dolt_stats_gc()"
[ "$status" -eq 1 ]
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
}
@test "stats: dolt_stats_purge" {
# running once populates stats and returns valid json response
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_purge(); call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "${lines[5]}" =~ '"{""dbCnt"":0,""active"":false,""storageBucketCnt"":0,""cachedBucketCnt"":0,""cachedBoundCnt"":0,""cachedTemplateCnt"":0,""statCnt"":0,""backing"":""repo2""}"' ]] || false
}
@test "stats: dolt_stats_purge server" {
cd repo2
start_sql_server
run dolt sql -q "insert into xy values (0,0), (1,1)"
sleep 1
stop_sql_server
dolt sql -q "insert into xy values (0,0), (1,1)"
dolt sql -q "call dolt_stats_wait()"
dolt sql -q "call dolt_stats_stop()"
dolt sql -q "call dolt_stats_purge()"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "${lines[1]}" =~ '"{""dbCnt"":0,""active"":false,""storageBucketCnt"":0,""cachedBucketCnt"":0,""cachedBoundCnt"":0,""cachedTemplateCnt"":0,""statCnt"":0,""backing"":""repo2""}"' ]] || false
}
@test "stats: dolt_stats_gc fails in shell" {
cd repo2
dolt sql <<SQL
insert into xy values (0,0), (1,1);
call dolt_stats_once();
insert into xy values (2,2), (3,3);
call dolt_stats_once();
SQL
run dolt sql -q "dolt_stats_gc()"
[ "$status" -eq 1 ]
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":0,""active"":false,""storageBucketCnt"":4,""cachedBucketCnt"":0,""cachedBoundCnt"":0,""cachedTemplateCnt"":0,""statCnt"":0,""backing"":""repo2""}"' ]] || false
}
@test "stats: dolt_stats_gc server" {
cd repo2
# only user-triggered GC's
dolt sql -q "SET @@PERSIST.dolt_stats_gc_enabled = 0"
start_sql_server
dolt sql -r csv <<SQL
insert into xy values (0,0), (1,1);
create table toDelete(i int primary key);
insert into toDelete values (5), (6);
-- invalidate previous xy buckets
call dolt_stats_wait();
call dolt_stats_info('--short');
insert into xy values (2,2), (3,3);
call dolt_add('-A');
call dolt_commit('-m', 'main branch');
-- mirror main
call dolt_checkout('-b', 'feat1');
call dolt_checkout('-b', 'feat2');
create database other;
use other;
create table ot (i int primary key);
insert into ot values (0), (1), (2);
call dolt_stats_wait();
call dolt_stats_info('--short');
SQL
# starting point
# dbs: repo2/[main, feat1, feat2], other/main
# stats: repo2:[xy,ab,toDelete]*3, other:[ot]*1
run dolt sql -r csv -q "call dolt_stats_info('--short');"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":6,""cachedBucketCnt"":6,""cachedBoundCnt"":6,""cachedTemplateCnt"":6,""statCnt"":10,""backing"":""repo2""}"' ]] || false
# clear invalid xy
dolt sql -q "call dolt_stats_gc()"
dolt sql -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":6,""statCnt"":10,""backing"":""repo2""}"' ]] || false
# remove toDelete table from 2/3 branches and gc
dolt sql -q "use repo2; call dolt_checkout('feat1'); drop table toDelete"
dolt sql -q "use repo2; call dolt_checkout('main'); drop table toDelete"
dolt sql -q "call dolt_stats_gc()"
dolt sql -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":6,""statCnt"":8,""backing"":""repo2""}"' ]] || false
# remove branch stats and gc
dolt sql -q "use repo2; call dolt_branch('-D', 'feat1', 'feat2')"
dolt sql -q "call dolt_stats_wait()"
dolt sql -q "call dolt_stats_gc()"
dolt sql -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":2,""active"":true,""storageBucketCnt"":3,""cachedBucketCnt"":3,""cachedBoundCnt"":3,""cachedTemplateCnt"":5,""statCnt"":3,""backing"":""repo2""}"' ]] || false
# delete whole db and gc
dolt sql -q "drop database other;"
dolt sql -q "call dolt_stats_wait()"
dolt sql -q "call dolt_stats_gc()"
dolt sql -r csv -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
}
@test "stats: delete database clean swap" {
# only user-triggered GC's
dolt sql -q "SET @@PERSIST.dolt_stats_gc_enabled = 0"
# don't start server in repo2, the shell->server access
# breaks when you delete the primary database
start_sql_server
dolt sql -r csv <<SQL
use repo2;
insert into xy values (0,0), (1,1);
create database other;
use other;
create table ot (i int primary key);
insert into ot values (0), (1), (2);
call dolt_stats_wait();
use other;
drop database repo2;
drop database repo1;
call dolt_stats_gc();
SQL
# other still exists
dolt sql -q "call dolt_stats_info('--short');"
run dolt sql -r csv -q "call dolt_stats_info('--short');"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":1,""cachedBucketCnt"":1,""cachedBoundCnt"":1,""cachedTemplateCnt"":1,""statCnt"":1,""backing"":""other""}"' ]] || false
}
@test "stats: multiple stats dbs at start is OK" {
cd repo2
dolt sql -q "insert into xy values (0,0)"
dolt sql -q "insert into ab values (0,0)"
dolt sql -q "call dolt_stats_once()"
cd ../repo1
dolt sql -q "insert into ab values (0,0)"
dolt sql -q "call dolt_stats_once()"
cd ..
start_sql_server
dolt sql -q "call dolt_stats_wait();"
dolt sql -q "call dolt_stats_info('--short');"
run dolt sql -r csv -q "call dolt_stats_info('--short');"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":2,""active"":true,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":3,""backing"":""repo1""}"' ]] || false
}
@test "stats: dolt_stats_stop_restart" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
start_sql_server
dolt sql -r csv -q "call dolt_stats_wait()"
# server running stats by default
dolt sql -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
# stop turns stats off
dolt sql -q "call dolt_stats_stop()"
dolt sql -r csv -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
# don't pick up changes when stopped
dolt sql -q "insert into xy values (2,2), (4,4)"
run dolt sql -r csv -q "call dolt_stats_wait()"
[ "$status" -eq 1 ]
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
dolt sql -r csv -q "call dolt_stats_restart()"
dolt sql -r csv -q "call dolt_stats_wait()"
dolt sql -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
}
@test "stats: memory only doesn't write to disk" {
cd repo2
dolt sql -q "set @@PERSIST.dolt_stats_memory_only = 1"
start_sql_server
dolt sql -q "insert into xy values (0,0), (1,1)"
dolt sql -q "call dolt_stats_once()"
dolt sql -q "call dolt_stats_info('--short')"
run dolt sql -r csv -q "call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":0,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""memory""}"' ]] || false
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
stop_sql_server
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_info('--short')"
[ "$status" -eq 0 ]
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":0,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""memory""}"' ]] || false
}
@test "stats: only bootstrap server startup" {
@test "stats: waiters error for closed stats queue" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,1)"
dolt sql -q "analyze table xy"
dolt gc
run dolt sql -q "call dolt_stats_gc()"
[ "$status" -eq 1 ]
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
run dolt sql -q "call dolt_stats_wait()"
[ "$status" -eq 1 ]
run dolt sql -q "call dolt_stats_flush()"
[ "$status" -eq 1 ]
}
@test "stats: encode/decode loop is delimiter safe" {
@@ -147,12 +387,11 @@ teardown() {
dolt sql <<EOF
create table uv (u varbinary(255) primary key);
insert into uv values ('hello, world');
analyze table uv;
EOF
run dolt sql -r csv -q "select count(*) from dolt_statistics"
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "1" ]
[ "${lines[3]}" = "1" ]
}
@test "stats: correct stats directory location, issue#8324" {
@@ -167,6 +406,9 @@ EOF
run stat .dolt/repo2
[ "$status" -eq 1 ]
run stat .dolt/stats/.dolt
[ "$status" -eq 0 ]
}
@test "stats: restart in shell doesn't drop db, issue#8345" {
@@ -174,492 +416,62 @@ EOF
dolt sql -q "insert into xy values (0,0), (1,1), (2,2), (3,3), (4,4)"
dolt sql -q "insert into ab values (0,0), (1,1), (2,2), (3,3), (4,4)"
dolt sql -q "ANALYZE table xy, ab"
run dolt sql -r csv <<EOF
call dolt_stats_once();
select count(*) from dolt_statistics;
set @@GLOBAL.dolt_stats_auto_refresh_interval = 2;
call dolt_stats_restart();
select count(*) from dolt_statistics;
select sleep(3);
call dolt_stats_wait();
select count(*) from dolt_statistics;
EOF
[ "${lines[1]}" = "4" ]
[ "${lines[5]}" = "4" ]
[ "${lines[9]}" = "4" ]
[ "${lines[3]}" = "4" ]
[ "${lines[7]}" = "4" ]
[ "${lines[11]}" = "4" ]
[ "$status" -eq 0 ]
}
@test "stats: stats roundtrip restart" {
cd repo2
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
dolt sql -q "insert into xy values (0,0), (1,1)"
# make sure no stats
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
# add stats while server is running
start_sql_server
dolt sql -q "call dolt_stats_restart()"
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
stop_sql_server
# make sure restarted server sees same stats
start_sql_server
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
stop_sql_server
}
@test "stats: deletes refresh" {
cd repo2
dolt sql -q "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;"
# setting variables doesn't hang or error
dolt sql -q "set @@persist.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "set @@persist.dolt_stats_auto_refresh_threshold = .5"
dolt sql -q "set @@persist.dolt_stats_auto_refresh_interval = 1;"
start_sql_server
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "8" ]
# delete >50% of rows
dolt sql -q "delete from xy where x > 600"
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "4" ]
}
@test "stats: dolt_state_purge cli" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
# setting variables doesn't hang or error
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
dolt sql -q "analyze table xy"
#start_sql_server
#sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
dolt sql -q "call dolt_stats_purge()"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
}
@test "stats: dolt_state_purge server" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
# setting variables doesn't hang or error
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
start_sql_server
sleep 1
dolt sql -q "analyze table xy"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
dolt sql -q "call dolt_stats_purge()"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
dolt sql -q "analyze table xy"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
stop_sql_server
}
@test "stats: dolt_state_prune cli" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
# setting variables doesn't hang or error
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
dolt sql -q "analyze table xy"
#start_sql_server
#sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
dolt sql -q "call dolt_stats_prune()"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
}
@test "stats: dolt_state_prune server" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
# setting variables doesn't hang or error
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
start_sql_server
sleep 1
dolt sql -q "analyze table xy"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
dolt sql -q "call dolt_stats_prune()"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
stop_sql_server
}
@test "stats: add/delete table" {
cd repo1
dolt sql -q "insert into ab values (0,0), (1,0), (2,0)"
# setting variables doesn't hang or error
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
start_sql_server
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
# add table
dolt sql -q "create table xy (x int primary key, y int)"
# schema changes don't impact the table hash
dolt sql -q "insert into xy values (0,0)"
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "1" ]
dolt sql -q "truncate table xy"
sleep 1
dolt sql -q "select * from xy"
dolt sql -q "select * from dolt_statistics where table_name = 'xy'"
run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
dolt sql -q "drop table xy"
run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
}
@test "stats: add/delete index" {
cd repo2
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
# setting variables doesn't hang or error
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
start_sql_server
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
# delete secondary
dolt sql -q "alter table xy drop index y"
# schema changes don't impact the table hash
dolt sql -q "insert into xy values (3,0)"
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "1" ]
dolt sql -q "alter table xy add index yx (y,x)"
# row change to impact table hash
dolt sql -q "insert into xy values (4,0)"
sleep 1
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
}
@test "stats: most common values" {
cd repo2
dolt sql -q "alter table xy add index y2 (y)"
dolt sql -q "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)"
dolt sql -q "analyze table xy"
run dolt sql -r csv -q "select mcv1, mcv2 from dolt_statistics where index_name = 'y2'"
run dolt sql -r csv -q "call dolt_stats_once(); select mcv1, mcv2 from dolt_statistics where index_name = 'y2'"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "1,0" ]
}
@test "stats: multi db" {
cd repo1
dolt sql -q "insert into ab values (0,0), (1,1)"
cd ../repo2
dolt sql -q "insert into ab values (0,0), (1,1)"
dolt sql -q "insert into xy values (0,0), (1,1)"
cd ..
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = 0.5"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
start_sql_server
sleep 1
dolt sql -q "use repo1"
run dolt sql -r csv -q "select database_name, table_name, index_name from dolt_statistics order by index_name"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "database_name,table_name,index_name" ]
[ "${lines[1]}" = "repo1,ab,b" ]
[ "${lines[2]}" = "repo1,ab,primary" ]
run dolt sql -r csv -q "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "database_name,table_name,index_name" ]
[ "${lines[1]}" = "repo2,ab,b" ]
[ "${lines[2]}" = "repo2,ab,primary" ]
[ "${lines[3]}" = "repo2,xy,primary" ]
[ "${lines[4]}" = "repo2,xy,y" ]
}
@test "stats: add/delete database" {
cd repo1
# setting variables doesn't hang or error
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
start_sql_server
dolt sql -q "insert into ab values (0,0), (1,0), (2,0)"
dolt sql <<SQL
create database repo2;
create table repo2.xy (x int primary key, y int, key(y,x));
insert into repo2.xy values (0,0), (1,0), (2,0);
SQL
sleep 1
# specify database_name filter even though can only see active db stats
run dolt sql -r csv <<SQL
use repo2;
select count(*) from dolt_statistics where database_name = 'repo2';
SQL
[ "$status" -eq 0 ]
[ "${lines[2]}" = "2" ]
# drop repo2
dolt sql -q "drop database repo2"
sleep 1
# we can't access repo2 stats, but still try
run dolt sql -r csv <<SQL
select count(*) from dolt_statistics where database_name = 'repo2';
SQL
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
dolt sql <<SQL
create database repo2;
create table repo2.xy (x int primary key, y int, key(y,x));
SQL
sleep 1
# no rows yet
run dolt sql -r csv <<SQL
use repo2;
select count(*) from dolt_statistics where database_name = 'repo2';
SQL
[ "$status" -eq 0 ]
[ "${lines[2]}" = "0" ]
dolt sql <<SQL
use repo2;
insert into xy values (0,0);
analyze table xy;
SQL
sleep 1
# insert initializes stats
run dolt sql -r csv <<SQL
use repo2;
select count(*) from dolt_statistics where database_name = 'repo2';
SQL
[ "$status" -eq 0 ]
[ "${lines[2]}" = "2" ]
}
# bats test_tags=no_lambda
@test "stats: boostrap abort over 1mm rows" {
cat <<EOF > data.py
import random
import os
rows = 2*1000*1000+1
def main():
f = open("data.csv","w+")
f.write("id,hostname\n")
for i in range(rows):
hostname = random.getrandbits(100)
f.write(f"{i},{hostname}\n")
if i % (500*1000) == 0:
print("row :", i)
f.flush()
f.close()
if __name__ == "__main__":
main()
EOF
mkdir repo3
cd repo3
python3 ../data.py
dolt init
dolt sql -q "create table f (id int primary key, hostname int)"
dolt table import -u --continue f data.csv
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false
[ "${lines[2]}" = "0" ]
[ "${lines[3]}" = "1,0" ]
}
@test "stats: stats delete index schema change" {
cd repo2
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
dolt sql -q "insert into xy values (0,0), (1,1)"
dolt sql -q "analyze table xy"
# stats OK after analyze
run dolt sql -r csv -q "select count(*) from dolt_statistics"
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
[ "${lines[3]}" = "2" ]
dolt sql -q "alter table xy drop index y"
# load after schema change should purge
run dolt sql -r csv -q "select count(*) from dolt_statistics"
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
dolt sql -q "analyze table xy"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "1" ]
[ "${lines[3]}" = "1" ]
}
@test "stats: stats recreate table without index" {
cd repo2
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
dolt sql -q "insert into xy values (0,0), (1,1)"
dolt sql -q "analyze table xy"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
dolt sql -q "call dolt_stats_once()"
dolt sql -q "drop table xy"
dolt sql -q "create table xy (x int primary key, y int)"
dolt sql -q "insert into xy values (0,0), (1,1)"
# make sure no stats
run dolt sql -r csv -q "select count(*) from dolt_statistics"
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]
dolt sql -q "analyze table xy"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "1" ]
stop_sql_server
[ "${lines[3]}" = "1" ]
}