mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-13 03:10:03 -05:00
[stats] Rewrite stat management to use single threaded event loop (#8815)
* [stats] event loop * more progress * basic scheduler test working * analyze * add/drop hooks * gc * delete an alter * drop index and table * fix other tests * branch management * starter for kv * gc and refactor maintanance * fix bucket doubling * delete log * better bucket counting * test for disk round trip * more prolly stats gc tests * rotate backing stats db * progress towards swapping old for new, deleting old code * fix gc bucket overflow * test for gc overflow * org and closers * save progress update * finally get first two bats running * startup bound hash issue * rewrite GC to be synchronous, fix more bugs * fix session freshness * fix branch gc * cache writes and gc are serialized * fix gc/branch update dropped hashes * fix gc race, doubling race, jobs race * fix more races * docs * convert bats to script tests * more tests, purge/stop * validate * docs * some PR cleanup * more cleanup * stash for pull * fix bucket hash conflicts * Fix more collection bugs. * bump, timer proc * more test fixes * cache bats changes * Another deadlock * delete comment * fmt * no read replica stats * fix plan tests * branch qualified analyze fix * [no-release-notes] go: statspro/jobqueue: Create a SerialQueue, which can perform asynchronous work on a worker thread. * go: statspro/jobqueue: A bit of cleanup, fix a flakey test. * rewrite with GDQ * prog * tests run * fix info and storage * outline for gc impl * fix tests and races * bump * better error and panic management * better start/stop/wait * Add rate limiting * gc ticker * docs * doc * test prog * fix more tests * finish up listener tests * add comments * gc concurrency * enginetests and statspro tests passing * simplify listeners * bats progress * small edits * tests progress * bats are running * fmt * build * edits * fix interface * fix build * stats alternate index types * fix mem test * build * fix more tests * fmt * more fmt * copyright * license * fix races * syntax error * fix windows path * nil mcv panic * fix test races * bump def job interval to 30ms * deterministic tests * more tests * TEMP COMMIT: valctx plus some other stuff... * shorter concurrency tests * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh * nondeterministic test * try to make queue tests less racy * missed one start * stats granular session locks * simplify a little * try to avoid serialq test deadlock * try to fix flakes * more races * bump * another race * cleanup * more cleanup * revert ctx validation * most zach comments * more comments * more race * bump * more race * bump * schemas * skip windows racees * standardize server config init, use background threads management * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh * default stats noop * threads management improvements * undo change * move stats initialization back to engine * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --------- Co-authored-by: Aaron Son <aaron@dolthub.com> Co-authored-by: max-hoffman <max-hoffman@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
0dd4217f60
commit
b8b2ff1c99
Generated
-33
@@ -16782,39 +16782,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
= LICENSE 3565fbf999a10a748647f3a2f7ff9f5dfcf1af7502a30f860ef0bf98 =
|
||||
================================================================================
|
||||
|
||||
================================================================================
|
||||
= gopkg.in/errgo.v2 licensed under: =
|
||||
|
||||
Copyright © 2013, Roger Peppe
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of this project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
= LICENSE fdb54eb3c3cf061a91aac42ab8e6578c3c69de803c2becb0d86810a5 =
|
||||
================================================================================
|
||||
|
||||
================================================================================
|
||||
= gopkg.in/go-jose/go-jose.v2 licensed under: =
|
||||
|
||||
|
||||
@@ -310,8 +310,14 @@ func relateCommitToParentChunks(ctx context.Context, commit hash.Hash, groupings
|
||||
|
||||
from, to, err := delta.GetRowData(ctx)
|
||||
|
||||
f := durable.ProllyMapFromIndex(from)
|
||||
t := durable.ProllyMapFromIndex(to)
|
||||
f, err := durable.ProllyMapFromIndex(from)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t, err := durable.ProllyMapFromIndex(to)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if f.Node().Level() != t.Node().Level() {
|
||||
continue
|
||||
|
||||
@@ -16,7 +16,6 @@ package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -45,7 +44,6 @@ import (
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/config"
|
||||
@@ -85,6 +83,7 @@ type SqlEngineConfig struct {
|
||||
AutoGCController *dsqle.AutoGCController
|
||||
BinlogReplicaController binlogreplication.BinlogReplicaController
|
||||
EventSchedulerStatus eventscheduler.SchedulerStatus
|
||||
StatsController sql.StatsProvider
|
||||
}
|
||||
|
||||
// NewSqlEngine returns a SqlEngine
|
||||
@@ -201,9 +200,6 @@ func NewSqlEngine(
|
||||
"authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig),
|
||||
})
|
||||
|
||||
statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider()))
|
||||
engine.Analyzer.Catalog.StatsProvider = statsPro
|
||||
|
||||
if config.AutoGCController != nil {
|
||||
err = config.AutoGCController.RunBackgroundThread(bThreads, sqlEngine.NewDefaultContext)
|
||||
if err != nil {
|
||||
@@ -216,8 +212,15 @@ func NewSqlEngine(
|
||||
dprocedures.UseSessionAwareSafepointController = true
|
||||
}
|
||||
|
||||
_, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsEnabled)
|
||||
if enabled.(int8) == 1 {
|
||||
config.StatsController = statspro.NewStatsController(logrus.StandardLogger(), mrEnv.GetEnv(mrEnv.GetFirstDatabase()))
|
||||
} else {
|
||||
config.StatsController = statspro.StatsNoop{}
|
||||
}
|
||||
|
||||
engine.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{})
|
||||
sessFactory := doltSessionFactory(pro, statsPro, mrEnv.Config(), bcController, gcSafepointController, config.Autocommit)
|
||||
sessFactory := doltSessionFactory(pro, config.StatsController, mrEnv.Config(), bcController, gcSafepointController, config.Autocommit)
|
||||
sqlEngine.provider = pro
|
||||
sqlEngine.contextFactory = sqlContextFactory
|
||||
sqlEngine.dsessFactory = sessFactory
|
||||
@@ -236,8 +239,28 @@ func NewSqlEngine(
|
||||
|
||||
// configuring stats depends on sessionBuilder
|
||||
// sessionBuilder needs ref to statsProv
|
||||
if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil {
|
||||
fmt.Fprintln(cli.CliErr, err)
|
||||
if sc, ok := config.StatsController.(*statspro.StatsController); ok {
|
||||
_, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly)
|
||||
sc.SetMemOnly(memOnly.(int8) == 1)
|
||||
|
||||
pro.InitDatabaseHooks = append(pro.InitDatabaseHooks, statspro.NewInitDatabaseHook(sc))
|
||||
pro.DropDatabaseHooks = append(pro.DropDatabaseHooks, statspro.NewDropDatabaseHook(sc))
|
||||
|
||||
var sqlDbs []sql.Database
|
||||
for _, db := range dbs {
|
||||
sqlDbs = append(sqlDbs, db)
|
||||
}
|
||||
|
||||
err = sc.Init(ctx, pro, sqlEngine.NewDefaultContext, bThreads, sqlDbs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, paused, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsPaused); paused.(int8) == 0 {
|
||||
if err = sc.Restart(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load MySQL Db information
|
||||
|
||||
@@ -303,17 +303,9 @@ func ConfigureServices(
|
||||
var sqlEngine *engine.SqlEngine
|
||||
InitSqlEngine := &svcs.AnonService{
|
||||
InitF: func(ctx context.Context) (err error) {
|
||||
if statsOn, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsAutoRefreshEnabled); err != nil {
|
||||
// Auto-stats is off by default for every command except
|
||||
// sql-server. Unless the config specifies a specific
|
||||
// behavior, enable server stats collection.
|
||||
sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, 1)
|
||||
} else if statsOn != "0" {
|
||||
// do not bootstrap if auto-stats enabled
|
||||
} else if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsBootstrapEnabled); err != nil {
|
||||
// If we've disabled stats collection and config does not
|
||||
// specify bootstrap behavior, enable bootstrapping.
|
||||
sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 1)
|
||||
if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsPaused); err != nil {
|
||||
// unless otherwise specified, run stats writer alongside server
|
||||
sql.SystemVariables.SetGlobal(dsess.DoltStatsPaused, 0)
|
||||
}
|
||||
sqlEngine, err = engine.NewSqlEngine(
|
||||
ctx,
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
package sqlserver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -184,11 +185,6 @@ func TestServerBadArgs(t *testing.T) {
|
||||
|
||||
func TestServerGoodParams(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
env, err := sqle.CreateEnvWithSeedData()
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
assert.NoError(t, env.DoltDB(ctx).Close())
|
||||
}()
|
||||
|
||||
tests := []servercfg.ServerConfig{
|
||||
DefaultCommandLineServerConfig(),
|
||||
@@ -210,6 +206,11 @@ func TestServerGoodParams(t *testing.T) {
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(servercfg.ConfigInfo(test), func(t *testing.T) {
|
||||
env, err := sqle.CreateEnvWithSeedData()
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
assert.NoError(t, env.DoltDB(ctx).Close())
|
||||
}()
|
||||
sc := svcs.NewController()
|
||||
go func(config servercfg.ServerConfig, sc *svcs.Controller) {
|
||||
_, _ = Serve(context.Background(), &Config{
|
||||
@@ -219,7 +220,7 @@ func TestServerGoodParams(t *testing.T) {
|
||||
DoltEnv: env,
|
||||
})
|
||||
}(test, sc)
|
||||
err := sc.WaitForStart()
|
||||
err = sc.WaitForStart()
|
||||
require.NoError(t, err)
|
||||
conn, err := dbr.Open("mysql", servercfg.ConnectionString(test, "dbname"), nil)
|
||||
require.NoError(t, err)
|
||||
@@ -228,6 +229,7 @@ func TestServerGoodParams(t *testing.T) {
|
||||
sc.Stop()
|
||||
err = sc.WaitForStop()
|
||||
assert.NoError(t, err)
|
||||
fmt.Println("stop server")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ require (
|
||||
github.com/creasty/defaults v1.6.0
|
||||
github.com/dolthub/aws-sdk-go-ini-parser v0.0.0-20250305001723-2821c37f6c12
|
||||
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af
|
||||
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63
|
||||
github.com/esote/minmaxheap v1.0.0
|
||||
github.com/goccy/go-json v0.10.2
|
||||
@@ -93,7 +93,6 @@ require (
|
||||
go.opentelemetry.io/otel/trace v1.32.0
|
||||
golang.org/x/text v0.22.0
|
||||
gonum.org/v1/plot v0.11.0
|
||||
gopkg.in/errgo.v2 v2.1.0
|
||||
gopkg.in/go-jose/go-jose.v2 v2.6.3
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
@@ -221,8 +221,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
|
||||
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
|
||||
github.com/dolthub/go-icu-regex v0.0.0-20250319212010-451ea8d003fa h1:NFbzJ4wjWRz32nz2EimbrHpRx1Xt6k+IaR8N+j4x62k=
|
||||
github.com/dolthub/go-icu-regex v0.0.0-20250319212010-451ea8d003fa/go.mod h1:ylU4XjUpsMcvl/BKeRRMXSH7e7WBrPXdSLvnRJYrxEA=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d h1:Ra9hv9fvJkSvjihPmtQB4EMGhq9qNp08gUI/mRmF9no=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250320042421-9a6edfcfab0d/go.mod h1:9itIc5jYYDRxmchFmegPaLaqdf4XWYX6nua5HhrajgA=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af h1:ozgYo2hKV6uQqLxZTS+QElHTaZ8mMiKOln25jZI1gVc=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250320173422-cce3ea1590af/go.mod h1:9itIc5jYYDRxmchFmegPaLaqdf4XWYX6nua5HhrajgA=
|
||||
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
|
||||
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
|
||||
github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE=
|
||||
@@ -1186,7 +1186,6 @@ gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
|
||||
gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
|
||||
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
|
||||
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
||||
gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o=
|
||||
|
||||
@@ -386,6 +386,12 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumC
|
||||
github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250228215144-f8da474ecd9f h1:lwQH9jVmSiPg1DFMYB9rWyyJTMPMoBpGrYRsOGOD/hA=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250228215144-f8da474ecd9f/go.mod h1:JTlrabhq5TJqvlL+J3NKlm0EzTHQQugUAH6yAxWi4Ww=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250305230031-14a57e076a0a h1:lemFIUt0NCKIeX7vnU2yKF8UIgc0DT8zIoEUn7oy+60=
|
||||
github.com/dolthub/go-mysql-server v0.19.1-0.20250305230031-14a57e076a0a/go.mod h1:yr+Vv47/YLOKMgiEY+QxHTlbIVpTuiVtkEZ5l+xruY4=
|
||||
github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577 h1:SegEguMxToBn045KRHLIUlF2/jR7Y2qD6fF+3tdOfvI=
|
||||
github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4=
|
||||
github.com/dolthub/swiss v0.1.0 h1:EaGQct3AqeP/MjASHLiH6i4TAmgbG/c4rA6a1bzCOPc=
|
||||
github.com/dolthub/swiss v0.1.0/go.mod h1:BeucyB08Vb1G9tumVN3Vp/pyY4AMUnr9p7Rz7wJ7kAQ=
|
||||
github.com/dolthub/vitess v0.0.0-20241104125316-860772ba6683 h1:2/RJeUfNAXS7mbBnEr9C36htiCJHk5XldDPzhxtEsME=
|
||||
github.com/dolthub/vitess v0.0.0-20241104125316-860772ba6683/go.mod h1:uBvlRluuL+SbEWTCZ68o0xvsdYZER3CEG/35INdzfJM=
|
||||
github.com/dolthub/vitess v0.0.0-20241231200706-18992bb25fdc/go.mod h1:1gQZs/byeHLMSul3Lvl3MzioMtOW1je79QYGyi2fd70=
|
||||
@@ -651,6 +657,8 @@ github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/sts v1.0.588/go.mod h1:
|
||||
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/tag v1.0.233/go.mod h1:sX14+NSvMjOhNFaMtP2aDy6Bss8PyFXij21gpY6+DAs=
|
||||
github.com/tencentyun/cos-go-sdk-v5 v0.7.42/go.mod h1:LUFnaqRmGk6pEHOaRmdn2dCZR2j0cSsM5xowWFPTPao=
|
||||
github.com/thanhpk/randstr v1.0.4/go.mod h1:M/H2P1eNLZzlDwAzpkkkUvoyNNMbzRGhESZuEQk3r0U=
|
||||
github.com/thepudds/swisstable v0.0.0-20221011152303-9c77dc657777 h1:5u+6YWU2faS+Sr/x8j9yalMpSDUkatNOZWXV3wMUCGQ=
|
||||
github.com/thepudds/swisstable v0.0.0-20221011152303-9c77dc657777/go.mod h1:4af3KxEsswy6aTzsTcwa8QZUSh4V+80oHdp1QX9uJHA=
|
||||
github.com/thlib/go-timezone-local v0.0.0-20210907160436-ef149e42d28e/go.mod h1:/Tnicc6m/lsJE0irFMA0LfIwTBo4QP7A8IfyIv4zZKI=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8 h1:ndzgwNDnKIqyCvHTXaCqh9KlOWKvBry6nuXMJmonVsE=
|
||||
github.com/tombuildsstuff/giovanni v0.15.1/go.mod h1:0TZugJPEtqzPlMpuJHYfXY6Dq2uLPrXf98D2XQSxNbA=
|
||||
@@ -740,6 +748,7 @@ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ
|
||||
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
|
||||
gopkg.in/cheggaaa/pb.v1 v1.0.25 h1:Ev7yu1/f6+d+b3pi5vPdRPc6nNtP1umSfcWiEfRqv6I=
|
||||
gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
|
||||
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
|
||||
gopkg.in/gcfg.v1 v1.2.3 h1:m8OOJ4ccYHnx2f4gQwpno8nAX5OGOh7RLaaz0pj3Ogs=
|
||||
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
|
||||
|
||||
@@ -105,11 +105,16 @@ func diffProllyTrees(ctx context.Context, ch chan DiffStatProgress, keyless bool
|
||||
|
||||
var f, t prolly.Map
|
||||
if from != nil {
|
||||
f = durable.ProllyMapFromIndex(from)
|
||||
f, err = durable.ProllyMapFromIndex(from)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if to != nil {
|
||||
t = durable.ProllyMapFromIndex(to)
|
||||
|
||||
t, err = durable.ProllyMapFromIndex(to)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
_, fVD := f.Descriptors()
|
||||
|
||||
@@ -2110,8 +2110,8 @@ func (ddb *DoltDB) AddStash(ctx context.Context, head *Commit, stash RootValue,
|
||||
return err
|
||||
}
|
||||
|
||||
func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Hash) error {
|
||||
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
|
||||
func (ddb *DoltDB) SetStatistics(ctx context.Context, branch string, addr hash.Hash) error {
|
||||
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -2119,8 +2119,8 @@ func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Ha
|
||||
return err
|
||||
}
|
||||
|
||||
func (ddb *DoltDB) DropStatisics(ctx context.Context, branch string) error {
|
||||
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
|
||||
func (ddb *DoltDB) DropStatisics(ctx context.Context) error {
|
||||
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
|
||||
|
||||
_, err = ddb.db.Delete(ctx, statsDs, "")
|
||||
if err != nil {
|
||||
@@ -2132,8 +2132,8 @@ func (ddb *DoltDB) DropStatisics(ctx context.Context, branch string) error {
|
||||
var ErrNoStatistics = errors.New("no statistics found")
|
||||
|
||||
// GetStatistics returns the value of the singleton ref.StatsRef for this database
|
||||
func (ddb *DoltDB) GetStatistics(ctx context.Context, branch string) (prolly.Map, error) {
|
||||
ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
|
||||
func (ddb *DoltDB) GetStatistics(ctx context.Context) (prolly.Map, error) {
|
||||
ds, err := ddb.db.GetDataset(ctx, ref.NewStatsRef().String())
|
||||
if err != nil {
|
||||
return prolly.Map{}, err
|
||||
}
|
||||
|
||||
@@ -273,8 +273,13 @@ type prollyIndex struct {
|
||||
}
|
||||
|
||||
// ProllyMapFromIndex unwraps the Index and returns the underlying prolly.Map.
|
||||
func ProllyMapFromIndex(i Index) prolly.Map {
|
||||
return i.(prollyIndex).index
|
||||
func ProllyMapFromIndex(i Index) (prolly.Map, error) {
|
||||
switch i := i.(type) {
|
||||
case prollyIndex:
|
||||
return i.index, nil
|
||||
default:
|
||||
return prolly.Map{}, fmt.Errorf("expected prollyIndex, found: %T", i)
|
||||
}
|
||||
}
|
||||
|
||||
// xxx: don't use this, temporary fix waiting for bigger
|
||||
@@ -369,7 +374,10 @@ func (i prollyIndex) AddColumnToRows(ctx context.Context, newCol string, newSche
|
||||
}
|
||||
|
||||
// If not, then we have to iterate over this table's rows and update all the offsets for the new column
|
||||
rowMap := ProllyMapFromIndex(i)
|
||||
rowMap, err := ProllyMapFromIndex(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mutator := rowMap.Mutate()
|
||||
|
||||
iter, err := mutator.IterAll(ctx)
|
||||
|
||||
@@ -295,7 +295,10 @@ func createRowIterForTable(ctx *sql.Context, t *doltdb.Table, sch schema.Schema)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rows := durable.ProllyMapFromIndex(rowData)
|
||||
rows, err := durable.ProllyMapFromIndex(rowData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rowCount, err := rows.Count()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -145,7 +145,10 @@ func (table *fulltextTable) ApplyToTable(ctx *sql.Context) (*doltdb.Table, error
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
keyDesc, valDesc := m.Descriptors()
|
||||
keyMap, valMap := ordinalMappingsFromSchema(table.SqlSch, table.Sch)
|
||||
mut := m.Mutate()
|
||||
|
||||
@@ -403,7 +403,7 @@ func assertNomsConflicts(t *testing.T, ctx context.Context, tbl *doltdb.Table, e
|
||||
func mustGetRowValueFromTable(t *testing.T, ctx context.Context, tbl *doltdb.Table, key val.Tuple) val.Tuple {
|
||||
idx, err := tbl.GetRowData(ctx)
|
||||
require.NoError(t, err)
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, _ := durable.ProllyMapFromIndex(idx)
|
||||
|
||||
var value val.Tuple
|
||||
err = m.Get(ctx, key, func(_, v val.Tuple) error {
|
||||
@@ -438,7 +438,7 @@ func assertKeylessRows(t *testing.T, ctx context.Context, tbl *doltdb.Table, exp
|
||||
func assertKeylessProllyRows(t *testing.T, ctx context.Context, tbl *doltdb.Table, expected []keylessEntry) {
|
||||
idx, err := tbl.GetRowData(ctx)
|
||||
require.NoError(t, err)
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, _ := durable.ProllyMapFromIndex(idx)
|
||||
|
||||
expectedSet := mustHash128Set(expected...)
|
||||
|
||||
|
||||
@@ -50,7 +50,10 @@ func mergeProllySecondaryIndexes(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mergedM := durable.ProllyMapFromIndex(finalRows)
|
||||
mergedM, err := durable.ProllyMapFromIndex(finalRows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tryGetIdx := func(sch schema.Schema, iS durable.IndexSet, indexName string) (prolly.Map, bool, error) {
|
||||
ok := sch.Indexes().Contains(indexName)
|
||||
@@ -59,7 +62,10 @@ func mergeProllySecondaryIndexes(
|
||||
if err != nil {
|
||||
return prolly.Map{}, false, err
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return prolly.Map{}, false, err
|
||||
}
|
||||
return m, true, nil
|
||||
}
|
||||
return prolly.Map{}, false, nil
|
||||
|
||||
@@ -73,7 +73,10 @@ func mergeProllyTable(
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
leftRows := durable.ProllyMapFromIndex(lr)
|
||||
leftRows, err := durable.ProllyMapFromIndex(lr)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
valueMerger := newValueMerger(mergedSch, tm.leftSch, tm.rightSch, tm.ancSch, leftRows.Pool(), tm.ns)
|
||||
|
||||
if !valueMerger.leftMapping.IsIdentityMapping() {
|
||||
@@ -130,7 +133,11 @@ func mergeProllyTableData(ctx *sql.Context, tm *TableMerger, finalSch schema.Sch
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
leftEditor := durable.ProllyMapFromIndex(lr).Rewriter(finalSch.GetKeyDescriptor(ns), finalSch.GetValueDescriptor(ns))
|
||||
lIdx, err := durable.ProllyMapFromIndex(lr)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
leftEditor := lIdx.Rewriter(finalSch.GetKeyDescriptor(ns), finalSch.GetValueDescriptor(ns))
|
||||
|
||||
ai, err := mergeTbl.GetArtifacts(ctx)
|
||||
if err != nil {
|
||||
@@ -331,19 +338,27 @@ func threeWayDiffer(ctx context.Context, tm *TableMerger, valueMerger *valueMerg
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
leftRows := durable.ProllyMapFromIndex(lr)
|
||||
leftRows, err := durable.ProllyMapFromIndex(lr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rr, err := tm.rightTbl.GetRowData(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rightRows := durable.ProllyMapFromIndex(rr)
|
||||
|
||||
rightRows, err := durable.ProllyMapFromIndex(rr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ar, err := tm.ancTbl.GetRowData(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ancRows := durable.ProllyMapFromIndex(ar)
|
||||
ancRows, err := durable.ProllyMapFromIndex(ar)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tree.NewThreeWayDiffer(
|
||||
ctx,
|
||||
@@ -534,7 +549,10 @@ func newUniqValidator(ctx *sql.Context, sch schema.Schema, tm *TableMerger, vm *
|
||||
if err != nil {
|
||||
return uniqValidator{}, err
|
||||
}
|
||||
clustered := durable.ProllyMapFromIndex(rows)
|
||||
clustered, err := durable.ProllyMapFromIndex(rows)
|
||||
if err != nil {
|
||||
return uniqValidator{}, err
|
||||
}
|
||||
|
||||
indexes, err := tm.leftTbl.GetIndexSet(ctx)
|
||||
if err != nil {
|
||||
@@ -552,7 +570,10 @@ func newUniqValidator(ctx *sql.Context, sch schema.Schema, tm *TableMerger, vm *
|
||||
if err != nil {
|
||||
return uniqValidator{}, err
|
||||
}
|
||||
secondary := durable.ProllyMapFromIndex(idx)
|
||||
secondary, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return uniqValidator{}, err
|
||||
}
|
||||
|
||||
u, err := newUniqIndex(ctx, sch, tm.name.Name, def, clustered, secondary)
|
||||
if err != nil {
|
||||
|
||||
@@ -332,14 +332,18 @@ func TestMergeCommits(t *testing.T) {
|
||||
artifacts := durable.ProllyMapFromArtifactIndex(artIdx)
|
||||
MustEqualArtifactMap(t, expectedArtifacts, artifacts)
|
||||
|
||||
MustEqualProlly(t, tableName, durable.ProllyMapFromIndex(expectedRows), durable.ProllyMapFromIndex(mergedRows))
|
||||
idx1, _ := durable.ProllyMapFromIndex(expectedRows)
|
||||
idx2, _ := durable.ProllyMapFromIndex(mergedRows)
|
||||
MustEqualProlly(t, tableName, idx1, idx2)
|
||||
|
||||
for _, index := range sch.Indexes().AllIndexes() {
|
||||
mergedIndexRows, err := merged.table.GetIndexRowData(ctx, index.Name())
|
||||
require.NoError(t, err)
|
||||
expectedIndexRows, err := expected.GetIndexRowData(ctx, index.Name())
|
||||
require.NoError(t, err)
|
||||
MustEqualProlly(t, index.Name(), durable.ProllyMapFromIndex(expectedIndexRows), durable.ProllyMapFromIndex(mergedIndexRows))
|
||||
idx1, _ := durable.ProllyMapFromIndex(expectedIndexRows)
|
||||
idx2, _ := durable.ProllyMapFromIndex(mergedIndexRows)
|
||||
MustEqualProlly(t, index.Name(), idx1, idx2)
|
||||
}
|
||||
|
||||
h, err := merged.table.HashOf()
|
||||
@@ -635,7 +639,7 @@ func rebuildAllProllyIndexes(ctx *sql.Context, tbl *doltdb.Table) (*doltdb.Table
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
primary := durable.ProllyMapFromIndex(tableRowData)
|
||||
primary, _ := durable.ProllyMapFromIndex(tableRowData)
|
||||
|
||||
for _, index := range sch.Indexes().AllIndexes() {
|
||||
rebuiltIndexRowData, err := creation.BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, index, primary)
|
||||
|
||||
@@ -35,7 +35,10 @@ func GetMutableSecondaryIdxs(ctx *sql.Context, ourSch, sch schema.Schema, tableN
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mods[i], err = NewMutableSecondaryIdx(ctx, m, ourSch, sch, tableName, index)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -68,7 +71,10 @@ func GetMutableSecondaryIdxsWithPending(ctx *sql.Context, ns tree.NodeStore, our
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If the schema has changed, don't reuse the index.
|
||||
// TODO: This isn't technically required, but correctly handling updating secondary indexes when only some
|
||||
|
||||
@@ -361,7 +361,10 @@ func parentFkConstraintViolations(
|
||||
return nomsParentFkConstraintViolations(ctx, vr, foreignKey, postParent, postChild, preParent.Schema, m, receiver)
|
||||
}
|
||||
if preParent.IndexData == nil || postParent.Schema.GetPKCols().Size() == 0 || preParent.Schema.GetPKCols().Size() == 0 {
|
||||
m := durable.ProllyMapFromIndex(preParentRowData)
|
||||
m, err := durable.ProllyMapFromIndex(preParentRowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return prollyParentPriDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
|
||||
}
|
||||
empty, err := preParentRowData.Empty()
|
||||
@@ -377,7 +380,10 @@ func parentFkConstraintViolations(
|
||||
} else {
|
||||
idx = preParent.IndexData
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return prollyParentSecDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
|
||||
}
|
||||
|
||||
@@ -396,7 +402,10 @@ func childFkConstraintViolations(
|
||||
return nomsChildFkConstraintViolations(ctx, vr, foreignKey, postParent, postChild, preChild.Schema, m, receiver)
|
||||
}
|
||||
if preChild.IndexData == nil || postChild.Schema.GetPKCols().Size() == 0 || preChild.Schema.GetPKCols().Size() == 0 {
|
||||
m := durable.ProllyMapFromIndex(preChildRowData)
|
||||
m, err := durable.ProllyMapFromIndex(preChildRowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return prollyChildPriDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
|
||||
}
|
||||
empty, err := preChildRowData.Empty()
|
||||
@@ -412,7 +421,10 @@ func childFkConstraintViolations(
|
||||
} else {
|
||||
idx = preChild.IndexData
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return prollyChildSecDiffFkConstraintViolations(ctx, foreignKey, postParent, postChild, m, receiver)
|
||||
}
|
||||
|
||||
|
||||
@@ -38,19 +38,29 @@ func prollyParentSecDiffFkConstraintViolations(
|
||||
postParent, postChild *constraintViolationsLoadedTable,
|
||||
preParentSecIdx prolly.Map,
|
||||
receiver FKViolationReceiver) error {
|
||||
|
||||
postParentRowData := durable.ProllyMapFromIndex(postParent.RowData)
|
||||
postParentSecIdx := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
childSecIdx := durable.ProllyMapFromIndex(postChild.IndexData)
|
||||
postParentRowData, err := durable.ProllyMapFromIndex(postParent.RowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
postParentSecIdx, err := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
childSecIdx, err := durable.ProllyMapFromIndex(postChild.IndexData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
parentSecKD, _ := postParentSecIdx.Descriptors()
|
||||
parentPrefixKD := parentSecKD.PrefixDesc(len(foreignKey.TableColumns))
|
||||
partialKB := val.NewTupleBuilder(parentPrefixKD)
|
||||
|
||||
childPriIdx := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
childPriIdx, err := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
childPriKD, _ := childPriIdx.Descriptors()
|
||||
|
||||
var err error
|
||||
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
|
||||
considerAllRowsModified := false
|
||||
err = prolly.DiffMaps(ctx, preParentSecIdx, postParentSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
|
||||
@@ -95,20 +105,32 @@ func prollyParentPriDiffFkConstraintViolations(
|
||||
postParent, postChild *constraintViolationsLoadedTable,
|
||||
preParentRowData prolly.Map,
|
||||
receiver FKViolationReceiver) error {
|
||||
postParentRowData := durable.ProllyMapFromIndex(postParent.RowData)
|
||||
postParentIndexData := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
postParentRowData, err := durable.ProllyMapFromIndex(postParent.RowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
postParentIndexData, err := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
idxDesc, _ := postParentIndexData.Descriptors()
|
||||
partialDesc := idxDesc.PrefixDesc(len(foreignKey.TableColumns))
|
||||
partialKB := val.NewTupleBuilder(partialDesc)
|
||||
|
||||
childPriIdx := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
childScndryIdx := durable.ProllyMapFromIndex(postChild.IndexData)
|
||||
childPriIdx, err := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
childScndryIdx, err := durable.ProllyMapFromIndex(postChild.IndexData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
primaryKD, _ := childPriIdx.Descriptors()
|
||||
|
||||
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
|
||||
considerAllRowsModified := false
|
||||
err := prolly.DiffMaps(ctx, preParentRowData, postParentRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
|
||||
err = prolly.DiffMaps(ctx, preParentRowData, postParentRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
|
||||
switch diff.Type {
|
||||
case tree.RemovedDiff, tree.ModifiedDiff:
|
||||
partialKey, hadNulls := makePartialKey(partialKB, foreignKey.ReferencedTableColumns, postParent.Index, postParent.Schema, val.Tuple(diff.Key), val.Tuple(diff.From), preParentRowData.Pool())
|
||||
@@ -159,8 +181,14 @@ func prollyChildPriDiffFkConstraintViolations(
|
||||
postParent, postChild *constraintViolationsLoadedTable,
|
||||
preChildRowData prolly.Map,
|
||||
receiver FKViolationReceiver) error {
|
||||
postChildRowData := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
parentScndryIdx := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
postChildRowData, err := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
parentScndryIdx, err := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
idxDesc, _ := parentScndryIdx.Descriptors()
|
||||
partialDesc := idxDesc.PrefixDesc(len(foreignKey.TableColumns))
|
||||
@@ -168,7 +196,7 @@ func prollyChildPriDiffFkConstraintViolations(
|
||||
|
||||
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
|
||||
considerAllRowsModified := false
|
||||
err := prolly.DiffMaps(ctx, preChildRowData, postChildRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
|
||||
err = prolly.DiffMaps(ctx, preChildRowData, postChildRowData, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
|
||||
switch diff.Type {
|
||||
case tree.AddedDiff, tree.ModifiedDiff:
|
||||
k, v := val.Tuple(diff.Key), val.Tuple(diff.To)
|
||||
@@ -207,9 +235,18 @@ func prollyChildSecDiffFkConstraintViolations(
|
||||
postParent, postChild *constraintViolationsLoadedTable,
|
||||
preChildSecIdx prolly.Map,
|
||||
receiver FKViolationReceiver) error {
|
||||
postChildRowData := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
postChildSecIdx := durable.ProllyMapFromIndex(postChild.IndexData)
|
||||
parentSecIdx := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
postChildRowData, err := durable.ProllyMapFromIndex(postChild.RowData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
postChildSecIdx, err := durable.ProllyMapFromIndex(postChild.IndexData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
parentSecIdx, err := durable.ProllyMapFromIndex(postParent.IndexData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
parentSecIdxDesc, _ := parentSecIdx.Descriptors()
|
||||
prefixDesc := parentSecIdxDesc.PrefixDesc(len(foreignKey.TableColumns))
|
||||
@@ -218,7 +255,7 @@ func prollyChildSecDiffFkConstraintViolations(
|
||||
|
||||
// TODO: Determine whether we should surface every row as a diff when the map's value descriptor has changed.
|
||||
considerAllRowsModified := false
|
||||
err := prolly.DiffMaps(ctx, preChildSecIdx, postChildSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
|
||||
err = prolly.DiffMaps(ctx, preChildSecIdx, postChildSecIdx, considerAllRowsModified, func(ctx context.Context, diff tree.Diff) error {
|
||||
switch diff.Type {
|
||||
case tree.AddedDiff, tree.ModifiedDiff:
|
||||
k := val.Tuple(diff.Key)
|
||||
|
||||
@@ -405,7 +405,10 @@ func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTb
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newParentRows := durable.ProllyMapFromIndex(idx)
|
||||
newParentRows, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
oldParentSet, err := oldParentTbl.GetIndexSet(ctx)
|
||||
if err != nil {
|
||||
@@ -582,7 +585,10 @@ func migrateIndexSet(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newParent := durable.ProllyMapFromIndex(idx)
|
||||
newParent, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newIdx, err := migrateIndex(ctx, def.Schema(), oldParent, old, newParent, ns)
|
||||
if err != nil {
|
||||
|
||||
@@ -205,7 +205,7 @@ func Parse(str string) (DoltRef, error) {
|
||||
}
|
||||
|
||||
if prefix := PrefixForType(StatsRefType); strings.HasPrefix(str, prefix) {
|
||||
return NewStatsRef(str[len(prefix):]), nil
|
||||
return NewStatsRef(), nil
|
||||
}
|
||||
|
||||
if prefix := PrefixForType(TupleRefType); strings.HasPrefix(str, prefix) {
|
||||
|
||||
@@ -20,9 +20,11 @@ type StatsRef struct {
|
||||
|
||||
var _ DoltRef = StatsRef{}
|
||||
|
||||
const statsBranch = "main"
|
||||
|
||||
// NewStatsRef creates a reference to a statistic dataset head.
|
||||
func NewStatsRef(branch string) StatsRef {
|
||||
return StatsRef{branch}
|
||||
func NewStatsRef() StatsRef {
|
||||
return StatsRef{statsBranch}
|
||||
}
|
||||
|
||||
// GetType will return StatsRefType
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
package reliable
|
||||
|
||||
import (
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/remotestorage/internal/circular"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/circular"
|
||||
)
|
||||
|
||||
// A reliable.Chan is a type of channel transformer which can be used to build
|
||||
|
||||
@@ -24,12 +24,12 @@ import (
|
||||
const StatsVersion int64 = 1
|
||||
|
||||
const (
|
||||
StatsQualifierColName = "qualifier"
|
||||
StatsDbColName = "database_name"
|
||||
StatsTableColName = "table_name"
|
||||
StatsIndexColName = "index_name"
|
||||
StatsPositionColName = "position"
|
||||
StatsBranchName = "branch"
|
||||
StatsCommitHashColName = "commit_hash"
|
||||
StatsPrefixLenName = "prefix_len"
|
||||
StatsRowCountColName = "row_count"
|
||||
StatsDistinctCountColName = "distinct_count"
|
||||
StatsNullCountColName = "null_count"
|
||||
@@ -42,7 +42,7 @@ const (
|
||||
StatsMcv2ColName = "mcv2"
|
||||
StatsMcv3ColName = "mcv3"
|
||||
StatsMcv4ColName = "mcv4"
|
||||
StatsMcvCountsColName = "mcvCounts"
|
||||
StatsMcvCountsColName = "mcv_counts"
|
||||
StatsVersionColName = "version"
|
||||
)
|
||||
|
||||
@@ -52,6 +52,7 @@ const (
|
||||
StatsIndexTag
|
||||
StatsPositionTag
|
||||
StatsVersionTag
|
||||
StatsPrefixLenTag
|
||||
StatsCommitHashTag
|
||||
StatsRowCountTag
|
||||
StatsDistinctCountTag
|
||||
@@ -71,9 +72,9 @@ const (
|
||||
func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
|
||||
return sql.PrimaryKeySchema{
|
||||
Schema: sql.Schema{
|
||||
&sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsDbColName, Type: types.Text, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsTableColName, Type: types.Text, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsIndexColName, Type: types.Text, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName},
|
||||
@@ -88,7 +89,6 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
|
||||
&sql.Column{Name: StatsMcv4ColName, Type: types.Text, DatabaseSource: dbName},
|
||||
&sql.Column{Name: StatsMcvCountsColName, Type: types.Text, DatabaseSource: dbName},
|
||||
},
|
||||
PkOrdinals: []int{0, 1},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,20 +96,14 @@ var StatsTableDoltSchema = StatsTableDoltSchemaGen()
|
||||
|
||||
func StatsTableDoltSchemaGen() Schema {
|
||||
colColl := NewColCollection(
|
||||
NewColumn(StatsDbColName, StatsDbTag, stypes.StringKind, true, NotNullConstraint{}),
|
||||
NewColumn(StatsTableColName, StatsTableTag, stypes.StringKind, true, NotNullConstraint{}),
|
||||
NewColumn(StatsIndexColName, StatsIndexTag, stypes.StringKind, true, NotNullConstraint{}),
|
||||
NewColumn(StatsPositionColName, StatsPositionTag, stypes.IntKind, true, NotNullConstraint{}),
|
||||
NewColumn(StatsPrefixLenName, StatsPrefixLenTag, stypes.IntKind, true, NotNullConstraint{}),
|
||||
NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, true, NotNullConstraint{}),
|
||||
NewColumn(StatsVersionColName, StatsVersionTag, stypes.IntKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsRowCountColName, StatsRowCountTag, stypes.IntKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsDistinctCountColName, StatsDistinctCountTag, stypes.IntKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsNullCountColName, StatsNullCountTag, stypes.IntKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsColumnsColName, StatsColumnsTag, stypes.StringKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsTypesColName, StatsTypesTag, stypes.StringKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsUpperBoundColName, StatsUpperBoundTag, stypes.StringKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsUpperBoundCntColName, StatsUpperBoundCntTag, stypes.IntKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsCreatedAtColName, StatsCreatedAtTag, stypes.TimestampKind, false, NotNullConstraint{}),
|
||||
NewColumn(StatsMcv1ColName, StatsMcv1Tag, stypes.StringKind, false),
|
||||
NewColumn(StatsMcv2ColName, StatsMcv2Tag, stypes.StringKind, false),
|
||||
NewColumn(StatsMcv3ColName, StatsMcv3Tag, stypes.StringKind, false),
|
||||
|
||||
@@ -377,10 +377,16 @@ func (b *binlogProducer) createRowEvents(ctx *sql.Context, tableDeltas []diff.Ta
|
||||
|
||||
var fromMap, toMap prolly.Map
|
||||
if fromRowData != nil {
|
||||
fromMap = durable.ProllyMapFromIndex(fromRowData)
|
||||
fromMap, err = durable.ProllyMapFromIndex(fromRowData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if toRowData != nil {
|
||||
toMap = durable.ProllyMapFromIndex(toRowData)
|
||||
toMap, err = durable.ProllyMapFromIndex(toRowData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
sch, err := tableDelta.ToTable.GetSchema(ctx)
|
||||
|
||||
@@ -162,6 +162,10 @@ func (db database) RequestedName() string {
|
||||
return db.Name()
|
||||
}
|
||||
|
||||
func (db database) AliasedName() string {
|
||||
return db.Name()
|
||||
}
|
||||
|
||||
type noopRepoStateWriter struct{}
|
||||
|
||||
var _ env.RepoStateWriter = noopRepoStateWriter{}
|
||||
|
||||
@@ -694,6 +694,9 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
if branch == "" {
|
||||
branch = db.Revision()
|
||||
}
|
||||
dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.schemaName, branch, tables), true
|
||||
case doltdb.ProceduresTableName:
|
||||
found = true
|
||||
|
||||
@@ -985,7 +985,7 @@ func (p *DoltDatabaseProvider) databaseForRevision(ctx *sql.Context, revisionQua
|
||||
}
|
||||
}
|
||||
|
||||
db, err := revisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
|
||||
db, err := RevisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
|
||||
// preserve original user case in the case of not found
|
||||
if sql.ErrDatabaseNotFound.Is(err) {
|
||||
return nil, false, sql.ErrDatabaseNotFound.New(revisionQualifiedName)
|
||||
@@ -1526,8 +1526,8 @@ func isTag(ctx context.Context, db dsess.SqlDatabase, tagName string) (string, b
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
// revisionDbForBranch returns a new database that is tied to the branch named by revSpec
|
||||
func revisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
|
||||
// RevisionDbForBranch returns a new database that is tied to the branch named by revSpec
|
||||
func RevisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
|
||||
static := staticRepoState{
|
||||
branch: ref.NewBranchRef(revSpec),
|
||||
RepoStateWriter: srcDb.DbData().Rsw,
|
||||
|
||||
@@ -73,7 +73,11 @@ func getProllyRowMaps(ctx *sql.Context, vrw types.ValueReadWriter, ns tree.NodeS
|
||||
return prolly.Map{}, err
|
||||
}
|
||||
|
||||
return durable.ProllyMapFromIndex(idx), nil
|
||||
pm, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return prolly.Map{}, err
|
||||
}
|
||||
return pm, nil
|
||||
}
|
||||
|
||||
func resolveProllyConflicts(ctx *sql.Context, tbl *doltdb.Table, tblName string, ourSch, sch schema.Schema) (*doltdb.Table, error) {
|
||||
@@ -94,7 +98,10 @@ func resolveProllyConflicts(ctx *sql.Context, tbl *doltdb.Table, tblName string,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ourMap := durable.ProllyMapFromIndex(ourIdx)
|
||||
ourMap, err := durable.ProllyMapFromIndex(ourIdx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mutMap := ourMap.Mutate()
|
||||
|
||||
// get mutable secondary indexes
|
||||
|
||||
@@ -47,12 +47,15 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{
|
||||
{Name: "dolt_tag", Schema: int64Schema("status"), Function: doltTag},
|
||||
{Name: "dolt_verify_constraints", Schema: int64Schema("violations"), Function: doltVerifyConstraints},
|
||||
|
||||
{Name: "dolt_stats_drop", Schema: statsFuncSchema, Function: statsFunc(statsDrop)},
|
||||
{Name: "dolt_stats_restart", Schema: statsFuncSchema, Function: statsFunc(statsRestart)},
|
||||
{Name: "dolt_stats_stop", Schema: statsFuncSchema, Function: statsFunc(statsStop)},
|
||||
{Name: "dolt_stats_status", Schema: statsFuncSchema, Function: statsFunc(statsStatus)},
|
||||
{Name: "dolt_stats_prune", Schema: statsFuncSchema, Function: statsFunc(statsPrune)},
|
||||
{Name: "dolt_stats_info", Schema: statsFuncSchema, Function: statsFunc(statsInfo)},
|
||||
{Name: "dolt_stats_purge", Schema: statsFuncSchema, Function: statsFunc(statsPurge)},
|
||||
{Name: "dolt_stats_wait", Schema: statsFuncSchema, Function: statsFunc(statsWait)},
|
||||
{Name: "dolt_stats_flush", Schema: statsFuncSchema, Function: statsFunc(statsFlush)},
|
||||
{Name: "dolt_stats_once", Schema: statsFuncSchema, Function: statsFunc(statsOnce)},
|
||||
{Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)},
|
||||
{Name: "dolt_stats_timers", Schema: statsFuncSchema, Function: statsFunc(statsTimers)},
|
||||
}
|
||||
|
||||
// stringSchema returns a non-nullable schema with all columns as LONGTEXT.
|
||||
|
||||
@@ -15,14 +15,14 @@
|
||||
package dprocedures
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"strconv"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
)
|
||||
|
||||
@@ -34,9 +34,16 @@ var statsFuncSchema = []*sql.Column{
|
||||
},
|
||||
}
|
||||
|
||||
func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
|
||||
return func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
|
||||
res, err := fn(ctx)
|
||||
const OkResult = "Ok"
|
||||
|
||||
func statsFunc(fn func(ctx *sql.Context, args ...string) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
|
||||
return func(ctx *sql.Context, args ...string) (iter sql.RowIter, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = fmt.Errorf("stats function unexpectedly panicked: %s", r)
|
||||
}
|
||||
}()
|
||||
res, err := fn(ctx, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -44,124 +51,211 @@ func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Con
|
||||
}
|
||||
}
|
||||
|
||||
// AutoRefreshStatsProvider is a sql.StatsProvider that exposes hooks for
|
||||
// StatsInfo gives a summary of the current coordinator stats.
|
||||
type StatsInfo struct {
|
||||
DbCnt int `json:"dbCnt"`
|
||||
Active bool `json:"active"`
|
||||
StorageBucketCnt int `json:"storageBucketCnt"`
|
||||
CachedBucketCnt int `json:"cachedBucketCnt"`
|
||||
CachedBoundCnt int `json:"cachedBoundCnt"`
|
||||
CachedTemplateCnt int `json:"cachedTemplateCnt"`
|
||||
StatCnt int `json:"statCnt"`
|
||||
GcCnt int `json:"gcCnt,omitempty"`
|
||||
GenCnt int `json:"genCnt,omitempty"`
|
||||
Backing string `json:"backing"`
|
||||
}
|
||||
|
||||
// ToJson returns stats info as a json string. Use the |short|
|
||||
// flag to exclude cycle counters.
|
||||
func (si StatsInfo) ToJson(short bool) string {
|
||||
if short {
|
||||
si.GcCnt = 0
|
||||
si.GenCnt = 0
|
||||
}
|
||||
jsonData, err := json.Marshal(si)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return string(jsonData)
|
||||
}
|
||||
|
||||
// ExtendedStatsProvider is a sql.StatsProvider that exposes hooks for
|
||||
// observing and manipulating background database auto refresh threads.
|
||||
type AutoRefreshStatsProvider interface {
|
||||
type ExtendedStatsProvider interface {
|
||||
sql.StatsProvider
|
||||
CancelRefreshThread(string)
|
||||
StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error
|
||||
ThreadStatus(string) string
|
||||
Prune(ctx *sql.Context) error
|
||||
// Restart starts a new stats thread, finalizes any active thread
|
||||
Restart() error
|
||||
// Stop finalizes stats thread if active
|
||||
Stop()
|
||||
// Info returns summary statistics about the current coordinator state
|
||||
Info(ctx context.Context) (StatsInfo, error)
|
||||
// Purge wipes the memory and storage state, and pauses stats collection
|
||||
Purge(ctx *sql.Context) error
|
||||
// WaitForSync blocks until the stats state includes changes
|
||||
// from the current session
|
||||
WaitForSync(ctx context.Context) error
|
||||
// Gc forces the next stats cycle to perform a GC. Block until
|
||||
// the GC lands.
|
||||
Gc(ctx *sql.Context) error
|
||||
// WaitForFlush blocks until the next cycle finishes and flushes
|
||||
// buckets to disk.
|
||||
WaitForFlush(ctx *sql.Context) error
|
||||
// CollectOnce performs a stats update in-thread. This will contend
|
||||
// with background collection and most useful in a non-server context.
|
||||
CollectOnce(ctx context.Context) (string, error)
|
||||
// SetTimers is an access point for editing the statistics
|
||||
// delay timer. This will block if the scheduler is not running.
|
||||
SetTimers(int64, int64)
|
||||
}
|
||||
|
||||
type BranchStatsProvider interface {
|
||||
DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error
|
||||
}
|
||||
|
||||
// statsRestart tries to stop and then start a refresh thread
|
||||
func statsRestart(ctx *sql.Context) (interface{}, error) {
|
||||
// statsRestart cancels any ongoing update thread and starts a new worker
|
||||
func statsRestart(ctx *sql.Context, _ ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
statsPro := dSess.StatsProvider()
|
||||
dbName := strings.ToLower(ctx.GetCurrentDatabase())
|
||||
|
||||
if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
|
||||
pro := dSess.Provider()
|
||||
newFs, err := pro.FileSystemForDatabase(dbName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to restart stats collection: %w", err)
|
||||
if afp, ok := statsPro.(ExtendedStatsProvider); ok {
|
||||
if err := afp.Restart(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, newFs, pro.DbFactoryUrl(), "TODO")
|
||||
|
||||
sqlDb, ok := pro.BaseDatabase(ctx, dbName)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to restart stats collection: database not found: %s", dbName)
|
||||
}
|
||||
|
||||
afp.CancelRefreshThread(dbName)
|
||||
|
||||
err = afp.StartRefreshThread(ctx, pro, dbName, dEnv, sqlDb)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to restart collection: %w", err)
|
||||
}
|
||||
return fmt.Sprintf("restarted stats collection: %s", ref.StatsRef{}.String()), nil
|
||||
return OkResult, nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
// statsStatus returns the last update for a stats thread
|
||||
func statsStatus(ctx *sql.Context) (interface{}, error) {
|
||||
// statsInfo returns a coordinator state summary
|
||||
func statsInfo(ctx *sql.Context, args ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
dbName := strings.ToLower(ctx.GetCurrentDatabase())
|
||||
pro := dSess.StatsProvider()
|
||||
if afp, ok := pro.(AutoRefreshStatsProvider); ok {
|
||||
return afp.ThreadStatus(dbName), nil
|
||||
if afp, ok := pro.(ExtendedStatsProvider); ok {
|
||||
var short bool
|
||||
if len(args) > 0 && (args[0] == "-s" || args[0] == "--short") {
|
||||
short = true
|
||||
}
|
||||
info, err := afp.Info(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return info.ToJson(short), nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
// statsStop cancels a refresh thread
|
||||
func statsStop(ctx *sql.Context) (interface{}, error) {
|
||||
// statsWait blocks until the stats worker executes two full loops
|
||||
// of instructions. The second loop will include the most recent
|
||||
// committed session as of this function's execution.
|
||||
func statsWait(ctx *sql.Context, _ ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
pro := dSess.StatsProvider()
|
||||
if afp, ok := pro.(ExtendedStatsProvider); ok {
|
||||
if err := afp.WaitForSync(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return OkResult, nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
// statsOnce runs a one-off worker update. This is mostly used for
|
||||
// testing and grabbing statistics while in the shell. Servers
|
||||
// should use `dolt_stats_wait` to avoid contending with the
|
||||
// background thread.
|
||||
func statsOnce(ctx *sql.Context, _ ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
pro := dSess.StatsProvider()
|
||||
if afp, ok := pro.(ExtendedStatsProvider); ok {
|
||||
str, err := afp.CollectOnce(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return str, nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
// statsFlush waits for the next stats flush to storage.
|
||||
func statsFlush(ctx *sql.Context, _ ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
pro := dSess.StatsProvider()
|
||||
if afp, ok := pro.(ExtendedStatsProvider); ok {
|
||||
if err := afp.WaitForFlush(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return OkResult, nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
// statsGc sets the |doGc| flag and waits until a worker
|
||||
// performs an update/GC.
|
||||
func statsGc(ctx *sql.Context, _ ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
pro := dSess.StatsProvider()
|
||||
if afp, ok := pro.(ExtendedStatsProvider); ok {
|
||||
if err := afp.Gc(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return OkResult, nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
// statsStop flushes the job queue and leaves the stats provider
|
||||
// in a paused state.
|
||||
func statsStop(ctx *sql.Context, _ ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
statsPro := dSess.StatsProvider()
|
||||
dbName := strings.ToLower(ctx.GetCurrentDatabase())
|
||||
|
||||
if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
|
||||
afp.CancelRefreshThread(dbName)
|
||||
return fmt.Sprintf("stopped thread: %s", dbName), nil
|
||||
if afp, ok := statsPro.(ExtendedStatsProvider); ok {
|
||||
afp.Stop()
|
||||
return OkResult, nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
// statsDrop deletes the stats ref
|
||||
func statsDrop(ctx *sql.Context) (interface{}, error) {
|
||||
// statsPurge flushes the job queue, deletes the current caches
|
||||
// and storage targets, re-initializes the tracked database
|
||||
// states, and returns with stats collection paused.
|
||||
func statsPurge(ctx *sql.Context, _ ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
pro := dSess.StatsProvider()
|
||||
dbName := strings.ToLower(ctx.GetCurrentDatabase())
|
||||
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to drop stats: %w", err)
|
||||
}
|
||||
|
||||
if afp, ok := pro.(AutoRefreshStatsProvider); ok {
|
||||
// currently unsafe to drop stats while running refresh
|
||||
afp.CancelRefreshThread(dbName)
|
||||
}
|
||||
if bsp, ok := pro.(BranchStatsProvider); ok {
|
||||
err := bsp.DropBranchDbStats(ctx, branch, dbName, true)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to drop stats: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Sprintf("deleted stats ref for %s", dbName), nil
|
||||
}
|
||||
|
||||
// statsPrune replaces the current disk contents with only the currently
|
||||
// tracked in memory statistics.
|
||||
func statsPrune(ctx *sql.Context) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
|
||||
pro, ok := dSess.StatsProvider().(ExtendedStatsProvider)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("stats not persisted, cannot purge")
|
||||
}
|
||||
if err := pro.Prune(ctx); err != nil {
|
||||
return "failed to prune stats databases", err
|
||||
}
|
||||
return "pruned all stats databases", nil
|
||||
}
|
||||
|
||||
// statsPurge removes the stats database from disk
|
||||
func statsPurge(ctx *sql.Context) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("stats not persisted, cannot purge")
|
||||
}
|
||||
pro.Stop()
|
||||
|
||||
if err := pro.Purge(ctx); err != nil {
|
||||
return "failed to purged databases", err
|
||||
return "failed to purge stats", err
|
||||
}
|
||||
return "purged all database stats", nil
|
||||
|
||||
return OkResult, nil
|
||||
}
|
||||
|
||||
// statsTimers updates the stats timers, which go into effect immediately.
|
||||
func statsTimers(ctx *sql.Context, args ...string) (interface{}, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
statsPro := dSess.StatsProvider()
|
||||
|
||||
if len(args) != 2 {
|
||||
return nil, fmt.Errorf("expected timer arguments (ns): (job, gc)")
|
||||
}
|
||||
job, err := strconv.ParseInt(args[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("interval timer must be positive intergers")
|
||||
}
|
||||
gc, err := strconv.ParseInt(args[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("interval timer must be positive intergers")
|
||||
}
|
||||
|
||||
if afp, ok := statsPro.(ExtendedStatsProvider); ok {
|
||||
afp.SetTimers(job, gc)
|
||||
return OkResult, nil
|
||||
}
|
||||
return nil, fmt.Errorf("provider does not implement ExtendedStatsProvider")
|
||||
}
|
||||
|
||||
@@ -327,7 +327,10 @@ func (a *AutoIncrementTracker) deepSet(ctx *sql.Context, tableName string, table
|
||||
|
||||
func getMaxIndexValue(ctx context.Context, indexData durable.Index) (uint64, error) {
|
||||
if types.IsFormat_DOLT(indexData.Format()) {
|
||||
idx := durable.ProllyMapFromIndex(indexData)
|
||||
idx, err := durable.ProllyMapFromIndex(indexData)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
iter, err := idx.IterAllReverse(ctx)
|
||||
if err != nil {
|
||||
|
||||
@@ -122,6 +122,7 @@ type SqlDatabase interface {
|
||||
sql.Database
|
||||
sql.SchemaDatabase
|
||||
sql.DatabaseSchema
|
||||
sql.AliasedDatabase
|
||||
SessionDatabase
|
||||
RevisionDatabase
|
||||
|
||||
|
||||
@@ -59,12 +59,13 @@ const (
|
||||
DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch"
|
||||
DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs"
|
||||
|
||||
DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled"
|
||||
DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled"
|
||||
DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold"
|
||||
DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval"
|
||||
DoltStatsMemoryOnly = "dolt_stats_memory_only"
|
||||
DoltStatsBranches = "dolt_stats_branches"
|
||||
DoltStatsEnabled = "dolt_stats_enabled"
|
||||
DoltStatsPaused = "dolt_stats_paused"
|
||||
DoltStatsMemoryOnly = "dolt_stats_memory_only"
|
||||
DoltStatsBranches = "dolt_stats_branches"
|
||||
DoltStatsJobInterval = "dolt_stats_job_interval"
|
||||
DoltStatsGCInterval = "dolt_stats_gc_interval"
|
||||
DoltStatsGCEnabled = "dolt_stats_gc_enabled"
|
||||
)
|
||||
|
||||
const URLTemplateDatabasePlaceholder = "{database}"
|
||||
|
||||
@@ -154,7 +154,10 @@ func newProllyConflictRowIter(ctx *sql.Context, ct ProllyConflictsTable) (*proll
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ourRows := durable.ProllyMapFromIndex(idx)
|
||||
ourRows, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
itr, err := ct.artM.IterAllConflicts(ctx)
|
||||
if err != nil {
|
||||
@@ -424,7 +427,11 @@ func (itr *prollyConflictRowIter) loadTableMaps(ctx *sql.Context, baseHash, thei
|
||||
return err
|
||||
}
|
||||
|
||||
itr.baseRows = durable.ProllyMapFromIndex(idx)
|
||||
itr.baseRows, err = durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
itr.baseHash = baseHash
|
||||
}
|
||||
|
||||
@@ -446,7 +453,10 @@ func (itr *prollyConflictRowIter) loadTableMaps(ctx *sql.Context, baseHash, thei
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
itr.theirRows = durable.ProllyMapFromIndex(idx)
|
||||
itr.theirRows, err = durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
itr.theirHash = theirHash
|
||||
}
|
||||
|
||||
|
||||
@@ -251,7 +251,10 @@ func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, tar
|
||||
if err != nil {
|
||||
return prollyDiffIter{}, err
|
||||
}
|
||||
from = durable.ProllyMapFromIndex(idx)
|
||||
from, err = durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return prollyDiffIter{}, err
|
||||
}
|
||||
if fsch, err = dp.from.GetSchema(ctx); err != nil {
|
||||
return prollyDiffIter{}, err
|
||||
}
|
||||
@@ -263,7 +266,10 @@ func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, tar
|
||||
if err != nil {
|
||||
return prollyDiffIter{}, err
|
||||
}
|
||||
to = durable.ProllyMapFromIndex(idx)
|
||||
to, err = durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return prollyDiffIter{}, err
|
||||
}
|
||||
if tsch, err = dp.to.GetSchema(ctx); err != nil {
|
||||
return prollyDiffIter{}, err
|
||||
}
|
||||
|
||||
@@ -236,7 +236,10 @@ func newQueryCatalogEntryProlly(ctx context.Context, tbl *doltdb.Table, id, name
|
||||
if err != nil {
|
||||
return SavedQuery{}, nil, err
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return SavedQuery{}, nil, err
|
||||
}
|
||||
|
||||
existingSQ, err := retrieveFromQueryCatalogProlly(ctx, tbl, id)
|
||||
if err != nil && !ErrQueryNotFound.Is(err) {
|
||||
@@ -312,7 +315,11 @@ func retrieveFromQueryCatalogProlly(ctx context.Context, tbl *doltdb.Table, id s
|
||||
return SavedQuery{}, err
|
||||
}
|
||||
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return SavedQuery{}, err
|
||||
}
|
||||
|
||||
kb := val.NewTupleBuilder(catalogKd)
|
||||
kb.PutString(0, id)
|
||||
k := kb.Build(m.Pool())
|
||||
|
||||
@@ -68,7 +68,7 @@ func (st *StatisticsTable) DataLength(ctx *sql.Context) (uint64, error) {
|
||||
}
|
||||
|
||||
type BranchStatsProvider interface {
|
||||
GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error)
|
||||
GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error)
|
||||
}
|
||||
|
||||
// RowCount implements sql.StatisticsTable
|
||||
@@ -119,14 +119,19 @@ func (st *StatisticsTable) Partitions(*sql.Context) (sql.PartitionIter, error) {
|
||||
// PartitionRows is a sql.Table interface function that gets a row iterator for a partition
|
||||
func (st *StatisticsTable) PartitionRows(ctx *sql.Context, _ sql.Partition) (sql.RowIter, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
statsPro := dSess.StatsProvider().(BranchStatsProvider)
|
||||
statsPro, ok := dSess.StatsProvider().(BranchStatsProvider)
|
||||
if !ok {
|
||||
return sql.RowsToRowIter(), nil
|
||||
}
|
||||
var dStats []sql.Statistic
|
||||
for _, table := range st.tableNames {
|
||||
dbStats, err := statsPro.GetTableDoltStats(ctx, st.branch, st.dbName, st.schemaName, table)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dStats = append(dStats, dbStats...)
|
||||
for _, s := range dbStats {
|
||||
dStats = append(dStats, s)
|
||||
}
|
||||
}
|
||||
return stats.NewStatsIter(ctx, dStats...)
|
||||
}
|
||||
|
||||
@@ -825,7 +825,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif
|
||||
if err != nil {
|
||||
return workspaceDiffIter{}, err
|
||||
}
|
||||
base = durable.ProllyMapFromIndex(idx)
|
||||
base, err = durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return workspaceDiffIter{}, err
|
||||
}
|
||||
}
|
||||
|
||||
if wp.staging != nil {
|
||||
@@ -833,7 +836,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif
|
||||
if err != nil {
|
||||
return workspaceDiffIter{}, err
|
||||
}
|
||||
staging = durable.ProllyMapFromIndex(idx)
|
||||
staging, err = durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return workspaceDiffIter{}, err
|
||||
}
|
||||
}
|
||||
|
||||
if wp.working != nil {
|
||||
@@ -841,7 +847,10 @@ func newWorkspaceDiffIter(ctx *sql.Context, wp WorkspacePartition) (workspaceDif
|
||||
if err != nil {
|
||||
return workspaceDiffIter{}, err
|
||||
}
|
||||
working = durable.ProllyMapFromIndex(idx)
|
||||
working, err = durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return workspaceDiffIter{}, err
|
||||
}
|
||||
}
|
||||
|
||||
var nodeStore tree.NodeStore
|
||||
|
||||
@@ -392,16 +392,12 @@ func TestQueryPlans(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestIntegrationQueryPlans(t *testing.T) {
|
||||
harness := newDoltEnginetestHarness(t).WithConfigureStats(true)
|
||||
harness := newDoltEnginetestHarness(t)
|
||||
defer harness.Close()
|
||||
enginetest.TestIntegrationPlans(t, harness)
|
||||
}
|
||||
|
||||
func TestDoltDiffQueryPlans(t *testing.T) {
|
||||
if !types.IsFormat_DOLT(types.Format_Default) {
|
||||
t.Skip("only new format support system table indexing")
|
||||
}
|
||||
|
||||
harness := newDoltEnginetestHarness(t).WithParallelism(2) // want Exchange nodes
|
||||
RunDoltDiffQueryPlansTest(t, harness)
|
||||
}
|
||||
@@ -608,7 +604,7 @@ func TestScripts(t *testing.T) {
|
||||
if types.IsFormat_DOLT(types.Format_Default) {
|
||||
skipped = append(skipped, newFormatSkippedScripts...)
|
||||
}
|
||||
h := newDoltHarness(t).WithSkippedQueries(skipped)
|
||||
h := newDoltHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true)
|
||||
defer h.Close()
|
||||
enginetest.TestScripts(t, h)
|
||||
}
|
||||
@@ -685,20 +681,13 @@ func TestDoltUserPrivileges(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestJoinOps(t *testing.T) {
|
||||
if types.IsFormat_LD(types.Format_Default) {
|
||||
t.Skip("DOLT_LD keyless indexes are not sorted")
|
||||
}
|
||||
|
||||
h := newDoltHarness(t)
|
||||
defer h.Close()
|
||||
enginetest.TestJoinOps(t, h, enginetest.DefaultJoinOpTests)
|
||||
}
|
||||
|
||||
func TestJoinPlanning(t *testing.T) {
|
||||
if types.IsFormat_LD(types.Format_Default) {
|
||||
t.Skip("DOLT_LD keyless indexes are not sorted")
|
||||
}
|
||||
h := newDoltEnginetestHarness(t).WithConfigureStats(true)
|
||||
h := newDoltEnginetestHarness(t)
|
||||
defer h.Close()
|
||||
enginetest.TestJoinPlanning(t, h)
|
||||
}
|
||||
@@ -706,7 +695,6 @@ func TestJoinPlanning(t *testing.T) {
|
||||
func TestJoinQueries(t *testing.T) {
|
||||
h := newDoltHarness(t)
|
||||
defer h.Close()
|
||||
enginetest.TestJoinQueries(t, h)
|
||||
}
|
||||
|
||||
func TestJoinQueriesPrepared(t *testing.T) {
|
||||
@@ -1458,11 +1446,6 @@ func TestStatBranchTests(t *testing.T) {
|
||||
RunStatBranchTests(t, harness)
|
||||
}
|
||||
|
||||
func TestStatsFunctions(t *testing.T) {
|
||||
harness := newDoltEnginetestHarness(t)
|
||||
RunStatsFunctionsTest(t, harness)
|
||||
}
|
||||
|
||||
func TestDiffTableFunction(t *testing.T) {
|
||||
harness := newDoltEnginetestHarness(t)
|
||||
RunDiffTableFunctionTests(t, harness)
|
||||
@@ -1669,11 +1652,6 @@ func TestStatsStorage(t *testing.T) {
|
||||
RunStatsStorageTests(t, h)
|
||||
}
|
||||
|
||||
func TestStatsIOWithoutReload(t *testing.T) {
|
||||
h := newDoltEnginetestHarness(t)
|
||||
RunStatsIOTestsWithoutReload(t, h)
|
||||
}
|
||||
|
||||
func TestJoinStats(t *testing.T) {
|
||||
h := newDoltEnginetestHarness(t)
|
||||
RunJoinStatsTests(t, h)
|
||||
@@ -1744,7 +1722,7 @@ func TestScriptsPrepared(t *testing.T) {
|
||||
skipped = append(skipped, newFormatSkippedScripts...)
|
||||
}
|
||||
skipPreparedTests(t)
|
||||
h := newDoltHarness(t).WithSkippedQueries(skipped)
|
||||
h := newDoltHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true)
|
||||
defer h.Close()
|
||||
enginetest.TestScriptsPrepared(t, h)
|
||||
}
|
||||
@@ -1945,6 +1923,10 @@ func TestCreateDatabaseErrorCleansUp(t *testing.T) {
|
||||
// (2) auto refresh threads, and (3) manual ANALYZE statements.
|
||||
// todo: the dolt_stat functions should be concurrency tested
|
||||
func TestStatsAutoRefreshConcurrency(t *testing.T) {
|
||||
if runtime.GOOS == "windows" && os.Getenv("CI") != "" {
|
||||
t.Skip("Racy on Windows CI.")
|
||||
}
|
||||
|
||||
// create engine
|
||||
harness := newDoltHarness(t)
|
||||
harness.Setup(setup.MydbData)
|
||||
@@ -1959,21 +1941,16 @@ func TestStatsAutoRefreshConcurrency(t *testing.T) {
|
||||
|
||||
// Setting an interval of 0 and a threshold of 0 will result
|
||||
// in the stats being updated after every operation
|
||||
intervalSec := time.Duration(0)
|
||||
thresholdf64 := 0.
|
||||
bThreads := sql.NewBackgroundThreads()
|
||||
branches := []string{"main"}
|
||||
statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.Provider)
|
||||
statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.StatsController)
|
||||
|
||||
// it is important to use new sessions for this test, to avoid working root conflicts
|
||||
readCtx := enginetest.NewSession(harness)
|
||||
writeCtx := enginetest.NewSession(harness)
|
||||
refreshCtx := enginetest.NewSession(harness)
|
||||
newCtx := func(context.Context) (*sql.Context, error) {
|
||||
return refreshCtx, nil
|
||||
}
|
||||
|
||||
err := statsProv.InitAutoRefreshWithParams(newCtx, sqlDb.Name(), bThreads, intervalSec, thresholdf64, branches)
|
||||
fs, err := engine.EngineAnalyzer().Catalog.DbProvider.(*sqle.DoltDatabaseProvider).FileSystemForDatabase(sqlDb.AliasedName())
|
||||
require.NoError(t, err)
|
||||
|
||||
err = statsProv.AddFs(readCtx, sqlDb, fs, true)
|
||||
require.NoError(t, err)
|
||||
|
||||
execQ := func(ctx *sql.Context, q string, id int, tag string) {
|
||||
|
||||
@@ -234,41 +234,8 @@ func RunVersionedQueriesTest(t *testing.T, h DoltEnginetestHarness) {
|
||||
}
|
||||
|
||||
func RunQueryTestPlans(t *testing.T, harness DoltEnginetestHarness) {
|
||||
// Dolt supports partial keys, so the index matched is different for some plans
|
||||
// TODO: Fix these differences by implementing partial key matching in the memory tables, or the engine itself
|
||||
skipped := []string{
|
||||
"SELECT pk,pk1,pk2 FROM one_pk LEFT JOIN two_pk ON pk=pk1",
|
||||
"SELECT pk,pk1,pk2 FROM one_pk JOIN two_pk ON pk=pk1",
|
||||
"SELECT one_pk.c5,pk1,pk2 FROM one_pk JOIN two_pk ON pk=pk1 ORDER BY 1,2,3",
|
||||
"SELECT opk.c5,pk1,pk2 FROM one_pk opk JOIN two_pk tpk ON opk.pk=tpk.pk1 ORDER BY 1,2,3",
|
||||
"SELECT opk.c5,pk1,pk2 FROM one_pk opk JOIN two_pk tpk ON pk=pk1 ORDER BY 1,2,3",
|
||||
"SELECT pk,pk1,pk2 FROM one_pk LEFT JOIN two_pk ON pk=pk1 ORDER BY 1,2,3",
|
||||
"SELECT pk,pk1,pk2 FROM one_pk t1, two_pk t2 WHERE pk=1 AND pk2=1 AND pk1=1 ORDER BY 1,2",
|
||||
}
|
||||
// Parallelism introduces Exchange nodes into the query plans, so disable.
|
||||
// TODO: exchange nodes should really only be part of the explain plan under certain debug settings
|
||||
harness = harness.NewHarness(t).WithSkippedQueries(skipped).WithConfigureStats(true)
|
||||
if !types.IsFormat_DOLT(types.Format_Default) {
|
||||
// only new format supports reverse IndexTableAccess
|
||||
reverseIndexSkip := []string{
|
||||
"SELECT * FROM one_pk ORDER BY pk",
|
||||
"SELECT * FROM two_pk ORDER BY pk1, pk2",
|
||||
"SELECT * FROM two_pk ORDER BY pk1",
|
||||
"SELECT pk1 AS one, pk2 AS two FROM two_pk ORDER BY pk1, pk2",
|
||||
"SELECT pk1 AS one, pk2 AS two FROM two_pk ORDER BY one, two",
|
||||
"SELECT i FROM (SELECT i FROM mytable ORDER BY i DESC LIMIT 1) sq WHERE i = 3",
|
||||
"SELECT i FROM (SELECT i FROM (SELECT i FROM mytable ORDER BY DES LIMIT 1) sql1)sql2 WHERE i = 3",
|
||||
"SELECT s,i FROM mytable order by i DESC",
|
||||
"SELECT s,i FROM mytable as a order by i DESC",
|
||||
"SELECT pk1, pk2 FROM two_pk order by pk1 asc, pk2 asc",
|
||||
"SELECT pk1, pk2 FROM two_pk order by pk1 desc, pk2 desc",
|
||||
"SELECT i FROM (SELECT i FROM (SELECT i FROM mytable ORDER BY i DESC LIMIT 1) sq1) sq2 WHERE i = 3",
|
||||
}
|
||||
harness = harness.WithSkippedQueries(reverseIndexSkip)
|
||||
}
|
||||
|
||||
harness = harness.NewHarness(t)
|
||||
defer harness.Close()
|
||||
sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0)
|
||||
enginetest.TestQueryPlans(t, harness, queries.PlanTests)
|
||||
}
|
||||
|
||||
@@ -1165,21 +1132,6 @@ func mustNewEngine(t *testing.T, h enginetest.Harness) enginetest.QueryEngine {
|
||||
return e
|
||||
}
|
||||
|
||||
func RunStatsFunctionsTest(t *testing.T, harness DoltEnginetestHarness) {
|
||||
defer harness.Close()
|
||||
for _, test := range StatProcTests {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
// reset engine so provider statistics are clean
|
||||
harness = harness.NewHarness(t).WithConfigureStats(true)
|
||||
harness.Setup(setup.MydbData)
|
||||
harness.SkipSetupCommit()
|
||||
e := mustNewEngine(t, harness)
|
||||
defer e.Close()
|
||||
enginetest.TestScriptWithEngine(t, e, harness, test)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func RunDiffTableFunctionTests(t *testing.T, harness DoltEnginetestHarness) {
|
||||
for _, test := range DiffTableFunctionScriptTests {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
@@ -1559,30 +1511,15 @@ func RunStatsHistogramTests(t *testing.T, h DoltEnginetestHarness) {
|
||||
}
|
||||
|
||||
func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) {
|
||||
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
|
||||
for _, script := range DoltHistogramTests {
|
||||
func() {
|
||||
h = h.NewHarness(t).WithConfigureStats(true)
|
||||
defer h.Close()
|
||||
e := mustNewEngine(t, h)
|
||||
if enginetest.IsServerEngine(e) {
|
||||
return
|
||||
}
|
||||
defer e.Close()
|
||||
TestProviderReloadScriptWithEngine(t, e, h, script)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) {
|
||||
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
|
||||
func() {
|
||||
h = h.NewHarness(t).WithConfigureStats(true)
|
||||
defer h.Close()
|
||||
e := mustNewEngine(t, h)
|
||||
if enginetest.IsServerEngine(e) {
|
||||
return
|
||||
}
|
||||
defer e.Close()
|
||||
enginetest.TestScriptWithEngine(t, e, h, script)
|
||||
}()
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
gms "github.com/dolthub/go-mysql-server"
|
||||
"github.com/dolthub/go-mysql-server/enginetest"
|
||||
@@ -28,6 +29,7 @@ import (
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/mysql_db"
|
||||
"github.com/dolthub/go-mysql-server/sql/rowexec"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
|
||||
@@ -36,7 +38,6 @@ import (
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
@@ -46,7 +47,7 @@ import (
|
||||
type DoltHarness struct {
|
||||
t *testing.T
|
||||
provider dsess.DoltDatabaseProvider
|
||||
statsPro sql.StatsProvider
|
||||
statsPro *statspro.StatsController
|
||||
multiRepoEnv *env.MultiRepoEnv
|
||||
session *dsess.DoltSession
|
||||
branchControl *branch_control.Controller
|
||||
@@ -59,6 +60,7 @@ type DoltHarness struct {
|
||||
setupDbs map[string]struct{}
|
||||
skipSetupCommit bool
|
||||
configureStats bool
|
||||
statsThreads *sql.BackgroundThreads
|
||||
useLocalFilesystem bool
|
||||
setupTestProcedures bool
|
||||
}
|
||||
@@ -242,12 +244,19 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
|
||||
}
|
||||
doltProvider, ok := pro.(*sqle.DoltDatabaseProvider)
|
||||
require.True(t, ok)
|
||||
|
||||
d.provider = doltProvider
|
||||
|
||||
d.gcSafepointController = dsess.NewGCSafepointController()
|
||||
|
||||
statsProv := statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
|
||||
d.statsPro = statsProv
|
||||
bThreads := sql.NewBackgroundThreads()
|
||||
|
||||
ctxGen := func(ctx context.Context) (*sql.Context, error) {
|
||||
client := sql.Client{Address: "localhost", User: "root"}
|
||||
return sql.NewContext(context.Background(), sql.WithSession(d.newSessionWithClient(client))), nil
|
||||
}
|
||||
statsPro := statspro.NewStatsController(logrus.StandardLogger(), d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase()))
|
||||
d.statsPro = statsPro
|
||||
|
||||
var err error
|
||||
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, d.gcSafepointController)
|
||||
@@ -262,6 +271,7 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
|
||||
|
||||
sqlCtx := enginetest.NewContext(d)
|
||||
databases := pro.AllDatabases(sqlCtx)
|
||||
|
||||
d.setupDbs = make(map[string]struct{})
|
||||
var dbs []string
|
||||
for _, db := range databases {
|
||||
@@ -281,41 +291,45 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
e = e.WithBackgroundThreads(bThreads)
|
||||
|
||||
// xxx: stats threads can't be tied to single test cycle,
|
||||
// this is only OK for enginetests
|
||||
statsThreads := sql.NewBackgroundThreads()
|
||||
if d.configureStats {
|
||||
bThreads := sql.NewBackgroundThreads()
|
||||
e = e.WithBackgroundThreads(bThreads)
|
||||
|
||||
dSess := dsess.DSessFromSess(sqlCtx.Session)
|
||||
dbCache := dSess.DatabaseCache(sqlCtx)
|
||||
|
||||
dsessDbs := make([]dsess.SqlDatabase, len(dbs))
|
||||
for i, dbName := range dbs {
|
||||
dsessDbs[i], _ = dbCache.GetCachedRevisionDb(fmt.Sprintf("%s/main", dbName), dbName)
|
||||
err = statsPro.Init(ctx, doltProvider, ctxGen, statsThreads, databases)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
statsPro.SetTimers(int64(1*time.Nanosecond), int64(1*time.Second))
|
||||
|
||||
ctxFact := func(context.Context) (*sql.Context, error) {
|
||||
sess := d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"})
|
||||
return sql.NewContext(context.Background(), sql.WithSession(sess)), nil
|
||||
}
|
||||
if err = statsProv.Configure(sqlCtx, ctxFact, bThreads, dsessDbs); err != nil {
|
||||
err = statsPro.Restart()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
statsOnlyQueries := filterStatsOnlyQueries(d.setupData)
|
||||
e, err = enginetest.RunSetupScripts(sqlCtx, e, statsOnlyQueries, d.SupportsNativeIndexCreation())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
finalizeStatsAfterSetup := []setup.SetupScript{{"call dolt_stats_wait()"}}
|
||||
e, err = enginetest.RunSetupScripts(sqlCtx, d.engine, finalizeStatsAfterSetup, d.SupportsNativeIndexCreation())
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// Reset the mysql DB table to a clean state for this new engine
|
||||
ctx := enginetest.NewContext(d)
|
||||
|
||||
d.engine.Analyzer.Catalog.MySQLDb = mysql_db.CreateEmptyMySQLDb()
|
||||
d.engine.Analyzer.Catalog.MySQLDb.AddRootAccount()
|
||||
d.engine.Analyzer.Catalog.StatsProvider = statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
|
||||
|
||||
var err error
|
||||
sqlCtx := enginetest.NewContext(d)
|
||||
e, err := enginetest.RunSetupScripts(sqlCtx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
|
||||
e, err := enginetest.RunSetupScripts(ctx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get a fresh session after running setup scripts, since some setup scripts can change the session state
|
||||
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
|
||||
@@ -430,7 +444,6 @@ func (d *DoltHarness) NewDatabases(names ...string) []sql.Database {
|
||||
doltProvider, ok := pro.(*sqle.DoltDatabaseProvider)
|
||||
require.True(d.t, ok)
|
||||
d.provider = doltProvider
|
||||
d.statsPro = statspro.NewProvider(doltProvider, statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
|
||||
|
||||
var err error
|
||||
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), doltProvider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
|
||||
@@ -502,7 +515,6 @@ func (d *DoltHarness) NewDatabaseProvider() sql.MutableDatabaseProvider {
|
||||
|
||||
func (d *DoltHarness) Close() {
|
||||
d.closeProvider()
|
||||
sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, int8(0))
|
||||
}
|
||||
|
||||
func (d *DoltHarness) closeProvider() {
|
||||
|
||||
@@ -17,159 +17,156 @@ package enginetest
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
gms "github.com/dolthub/go-mysql-server"
|
||||
"github.com/dolthub/go-mysql-server/enginetest"
|
||||
"github.com/dolthub/go-mysql-server/enginetest/queries"
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/types"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
|
||||
)
|
||||
|
||||
// fillerVarchar pushes the tree into level 3
|
||||
var fillerVarchar = strings.Repeat("x", 500)
|
||||
|
||||
var DoltHistogramTests = []queries.ScriptTest{
|
||||
{
|
||||
Name: "mcv checking",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
|
||||
"analyze table xy",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt where table_name = 'xy' and column_name = 'y,z'",
|
||||
Expected: []sql.Row{
|
||||
{types.JSONDocument{Val: []interface{}{
|
||||
float64(4),
|
||||
}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt where table_name = 'xy' and column_name = 'y,z'",
|
||||
Expected: []sql.Row{
|
||||
{types.JSONDocument{Val: []interface{}{
|
||||
[]interface{}{float64(0), "a"},
|
||||
}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt where table_name = 'xy' and column_name = 'y,z'",
|
||||
Expected: []sql.Row{
|
||||
{2, "a"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "int pk",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y varchar(500));",
|
||||
fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
|
||||
"analyze table xy",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'",
|
||||
Expected: []sql.Row{{32}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
Expected: []sql.Row{{float64(30000)}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
Expected: []sql.Row{{float64(0)}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
Expected: []sql.Row{{float64(30000)}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
Expected: []sql.Row{{int64(1)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "nulls distinct across chunk boundary",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
|
||||
fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar),
|
||||
"analyze table xy",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
|
||||
Expected: []sql.Row{{2}},
|
||||
},
|
||||
{
|
||||
// bucket boundary duplication
|
||||
Query: "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'",
|
||||
Expected: []sql.Row{{202}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{float64(400)}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{float64(200)}},
|
||||
},
|
||||
{
|
||||
// chunk border double count
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{float64(202)}},
|
||||
},
|
||||
{
|
||||
// max bound count is an all nulls chunk
|
||||
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{int64(183)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "int index",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
|
||||
fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
|
||||
"analyze table xy",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
|
||||
Expected: []sql.Row{{152}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{float64(30000)}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{float64(10000)}},
|
||||
},
|
||||
{
|
||||
// border NULL double count
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{float64(20036)}},
|
||||
},
|
||||
{
|
||||
// max bound count is nulls chunk
|
||||
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
Expected: []sql.Row{{int64(440)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
//{
|
||||
// Name: "mcv checking",
|
||||
// SetUpScript: []string{
|
||||
// "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
// "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
|
||||
// "analyze table xy",
|
||||
// },
|
||||
// Assertions: []queries.ScriptTestAssertion{
|
||||
// {
|
||||
// Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt where table_name = 'xy' and column_name = 'y,z'",
|
||||
// Expected: []sql.Row{
|
||||
// {types.JSONDocument{Val: []interface{}{
|
||||
// float64(4),
|
||||
// }}},
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt where table_name = 'xy' and column_name = 'y,z'",
|
||||
// Expected: []sql.Row{
|
||||
// {types.JSONDocument{Val: []interface{}{
|
||||
// []interface{}{float64(0), "a"},
|
||||
// }}},
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt where table_name = 'xy' and column_name = 'y,z'",
|
||||
// Expected: []sql.Row{
|
||||
// {2, "a"},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
//},
|
||||
//{
|
||||
// Name: "int pk",
|
||||
// SetUpScript: []string{
|
||||
// "CREATE table xy (x bigint primary key, y varchar(500));",
|
||||
// fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
|
||||
// fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
|
||||
// fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
|
||||
// "analyze table xy",
|
||||
// },
|
||||
// Assertions: []queries.ScriptTestAssertion{
|
||||
// {
|
||||
// Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'",
|
||||
// Expected: []sql.Row{{32}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
// Expected: []sql.Row{{float64(30000)}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
// Expected: []sql.Row{{float64(0)}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
// Expected: []sql.Row{{float64(30000)}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'",
|
||||
// Expected: []sql.Row{{int64(1)}},
|
||||
// },
|
||||
// },
|
||||
//},
|
||||
//{
|
||||
// Name: "nulls distinct across chunk boundary",
|
||||
// SetUpScript: []string{
|
||||
// "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
|
||||
// fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar),
|
||||
// fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar),
|
||||
// "analyze table xy",
|
||||
// },
|
||||
// Assertions: []queries.ScriptTestAssertion{
|
||||
// {
|
||||
// Query: "call dolt_stats_wait()",
|
||||
// },
|
||||
// {
|
||||
// Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
|
||||
// Expected: []sql.Row{{2}},
|
||||
// },
|
||||
// {
|
||||
// // bucket boundary duplication
|
||||
// Query: "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'",
|
||||
// Expected: []sql.Row{{202}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{float64(400)}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{float64(200)}},
|
||||
// },
|
||||
// {
|
||||
// // chunk border double count
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{float64(202)}},
|
||||
// },
|
||||
// {
|
||||
// // max bound count is an all nulls chunk
|
||||
// Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{int64(183)}},
|
||||
// },
|
||||
// },
|
||||
//},
|
||||
//{
|
||||
// Name: "int index",
|
||||
// SetUpScript: []string{
|
||||
// "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
|
||||
// fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
|
||||
// fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
|
||||
// fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
|
||||
// "analyze table xy",
|
||||
// },
|
||||
// Assertions: []queries.ScriptTestAssertion{
|
||||
// {
|
||||
// Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
|
||||
// Expected: []sql.Row{{152}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{float64(30000)}},
|
||||
// },
|
||||
// {
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{float64(10000)}},
|
||||
// },
|
||||
// {
|
||||
// // border NULL double count
|
||||
// Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{float64(20036)}},
|
||||
// },
|
||||
// {
|
||||
// // max bound count is nulls chunk
|
||||
// Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'",
|
||||
// Expected: []sql.Row{{int64(440)}},
|
||||
// },
|
||||
// },
|
||||
//},
|
||||
{
|
||||
Name: "multiint index",
|
||||
SetUpScript: []string{
|
||||
@@ -177,9 +174,11 @@ var DoltHistogramTests = []queries.ScriptTest{
|
||||
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
|
||||
"analyze table xy",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'",
|
||||
Expected: []sql.Row{{155}},
|
||||
@@ -203,6 +202,41 @@ var DoltHistogramTests = []queries.ScriptTest{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "multiint index small",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(x, z));",
|
||||
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 2) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 3 union select x+1 from inputs where x < 4) select * from inputs) dt", fillerVarchar),
|
||||
fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 5 union select x+1 from inputs where x < 6) select * from inputs) dt", fillerVarchar),
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'",
|
||||
Expected: []sql.Row{{1}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
|
||||
Expected: []sql.Row{{float64(6)}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
|
||||
Expected: []sql.Row{{float64(2)}},
|
||||
},
|
||||
{
|
||||
Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
|
||||
Expected: []sql.Row{{float64(6)}},
|
||||
},
|
||||
{
|
||||
// max bound count is nulls chunk
|
||||
Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x,z'",
|
||||
Expected: []sql.Row{{int64(1)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "several int index",
|
||||
SetUpScript: []string{
|
||||
@@ -211,7 +245,10 @@ var DoltHistogramTests = []queries.ScriptTest{
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: " SELECT column_name from information_schema.column_statistics",
|
||||
Query: "call dolt_stats_purge()",
|
||||
},
|
||||
{
|
||||
Query: "SELECT column_name from information_schema.column_statistics",
|
||||
Expected: []sql.Row{},
|
||||
},
|
||||
{
|
||||
@@ -535,8 +572,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
|
||||
{
|
||||
Name: "incremental stats deletes auto",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
"insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
|
||||
"analyze table xy",
|
||||
@@ -550,10 +585,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
|
||||
Query: "delete from xy where x > 500",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
Query: "analyze table xy",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics group by table_name, index_name",
|
||||
@@ -565,8 +597,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
|
||||
// https://github.com/dolthub/dolt/issues/8504
|
||||
Name: "alter index column type",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
|
||||
"CREATE table xy (x bigint primary key, y varchar(16))",
|
||||
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
|
||||
"analyze table xy",
|
||||
@@ -594,78 +624,9 @@ var DoltStatsStorageTests = []queries.ScriptTest{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "differentiate table cases",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
|
||||
"set @@PERSIST.dolt_stats_branches ='main'",
|
||||
"CREATE table XY (x bigint primary key, y varchar(16))",
|
||||
"insert into XY values (0,'0'), (1,'1'), (2,'2')",
|
||||
"analyze table XY",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "select table_name, upper_bound from dolt_statistics",
|
||||
Expected: []sql.Row{{"xy", "2"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "deleted table loads OK",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
|
||||
"set @@PERSIST.dolt_stats_branches ='main'",
|
||||
"CREATE table xy (x bigint primary key, y varchar(16))",
|
||||
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
|
||||
"analyze table xy",
|
||||
"CREATE table uv (u bigint primary key, v varchar(16))",
|
||||
"insert into uv values (0,'0'), (1,'1'), (2,'2')",
|
||||
"analyze table uv",
|
||||
"drop table uv",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "select table_name, upper_bound from dolt_statistics",
|
||||
Expected: []sql.Row{{"xy", "2"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "differentiate branch names",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
|
||||
"set @@PERSIST.dolt_stats_branches ='main,feat'",
|
||||
"CREATE table xy (x bigint primary key, y varchar(16))",
|
||||
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
|
||||
"analyze table xy",
|
||||
"call dolt_checkout('-b', 'feat')",
|
||||
"CREATE table xy (x varchar(16) primary key, y bigint, z bigint)",
|
||||
"insert into xy values (3,'3',3)",
|
||||
"analyze table xy",
|
||||
"call dolt_checkout('main')",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "select table_name, upper_bound from dolt_statistics",
|
||||
Expected: []sql.Row{{"xy", "2"}},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_checkout('feat')",
|
||||
},
|
||||
{
|
||||
Query: "select table_name, upper_bound from dolt_statistics",
|
||||
Expected: []sql.Row{{"xy", "3"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "drop primary key",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
|
||||
"CREATE table xy (x bigint primary key, y varchar(16))",
|
||||
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
|
||||
"analyze table xy",
|
||||
@@ -682,10 +643,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
|
||||
Query: "insert into xy values ('3', '3')",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.2)",
|
||||
Query: "analyze table xy",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics group by table_name, index_name",
|
||||
@@ -699,9 +657,6 @@ var StatBranchTests = []queries.ScriptTest{
|
||||
{
|
||||
Name: "multi branch stats",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
|
||||
"set @@PERSIST.dolt_stats_branches = 'main,feat';",
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
|
||||
"call dolt_commit('-Am', 'xy')",
|
||||
@@ -713,10 +668,7 @@ var StatBranchTests = []queries.ScriptTest{
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
Query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
Query: "select table_name, index_name, row_count from dolt_statistics",
|
||||
@@ -751,7 +703,7 @@ var StatBranchTests = []queries.ScriptTest{
|
||||
Query: "call dolt_commit('-am', 'cm')",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
Query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
|
||||
@@ -769,30 +721,6 @@ var StatBranchTests = []queries.ScriptTest{
|
||||
{"xy", "y", uint64(6)},
|
||||
},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_checkout('feat')",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_stop()",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_drop()",
|
||||
},
|
||||
{
|
||||
Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
|
||||
Expected: []sql.Row{},
|
||||
},
|
||||
{
|
||||
// we dropped 'feat', not 'main'
|
||||
Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'",
|
||||
Expected: []sql.Row{
|
||||
{"xy", "primary", uint64(6)},
|
||||
{"xy", "y", uint64(6)},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -812,302 +740,3 @@ var StatBranchTests = []queries.ScriptTest{
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var StatProcTests = []queries.ScriptTest{
|
||||
{
|
||||
Name: "deleting stats removes information_schema access point",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
"insert into xy values (0,0,0)",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "analyze table xy",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from information_schema.column_statistics",
|
||||
Expected: []sql.Row{{2}},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_drop()",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from information_schema.column_statistics",
|
||||
Expected: []sql.Row{{0}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "restart empty stats panic",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "analyze table xy",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics",
|
||||
Expected: []sql.Row{{0}},
|
||||
},
|
||||
{
|
||||
Query: "set @@GLOBAL.dolt_stats_auto_refresh_threshold = 0",
|
||||
Expected: []sql.Row{{}},
|
||||
},
|
||||
{
|
||||
Query: "set @@GLOBAL.dolt_stats_auto_refresh_interval = 0",
|
||||
Expected: []sql.Row{{}},
|
||||
},
|
||||
{
|
||||
// don't panic
|
||||
Query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
},
|
||||
{
|
||||
Query: "insert into xy values (0,0,0)",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics",
|
||||
Expected: []sql.Row{{2}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "basic start, status, stop loop",
|
||||
SetUpScript: []string{
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
"insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics",
|
||||
Expected: []sql.Row{{0}},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_status()",
|
||||
Expected: []sql.Row{{"no active stats thread"}},
|
||||
},
|
||||
// set refresh interval arbitrarily high to avoid updating when we restart
|
||||
{
|
||||
Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 100000;",
|
||||
Expected: []sql.Row{{}},
|
||||
},
|
||||
{
|
||||
Query: "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0",
|
||||
Expected: []sql.Row{{}},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_status()",
|
||||
Expected: []sql.Row{{"restarted thread: mydb"}},
|
||||
},
|
||||
{
|
||||
Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
|
||||
Expected: []sql.Row{{}},
|
||||
},
|
||||
// new restart picks up 0-interval, will start refreshing immediately
|
||||
{
|
||||
Query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_status()",
|
||||
Expected: []sql.Row{{"refreshed mydb"}},
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics",
|
||||
Expected: []sql.Row{{2}},
|
||||
},
|
||||
// kill refresh thread
|
||||
{
|
||||
Query: "call dolt_stats_stop()",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_status()",
|
||||
Expected: []sql.Row{{"cancelled thread: mydb"}},
|
||||
},
|
||||
// insert without refresh thread will not update stats
|
||||
{
|
||||
Query: "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'), (7,1,'a')",
|
||||
},
|
||||
{
|
||||
Query: "select sleep(.1)",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_status()",
|
||||
Expected: []sql.Row{{"cancelled thread: mydb"}},
|
||||
},
|
||||
// manual analyze will update stats
|
||||
{
|
||||
Query: "analyze table xy",
|
||||
Expected: []sql.Row{{"xy", "analyze", "status", "OK"}},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_status()",
|
||||
Expected: []sql.Row{{"refreshed mydb"}},
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics",
|
||||
Expected: []sql.Row{{2}},
|
||||
},
|
||||
// kill refresh thread and delete stats ref
|
||||
{
|
||||
Query: "call dolt_stats_drop()",
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_status()",
|
||||
Expected: []sql.Row{{"dropped"}},
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics",
|
||||
Expected: []sql.Row{{0}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "test purge",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;",
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
"insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');",
|
||||
"analyze table xy",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
|
||||
Expected: []sql.Row{{1}, {1}},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_purge()",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics;",
|
||||
Expected: []sql.Row{{0}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "test prune",
|
||||
SetUpScript: []string{
|
||||
"set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;",
|
||||
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
|
||||
"insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');",
|
||||
"analyze table xy",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
|
||||
Expected: []sql.Row{{1}, {1}},
|
||||
},
|
||||
{
|
||||
Query: "call dolt_stats_prune()",
|
||||
},
|
||||
{
|
||||
Query: "select count(*) from dolt_statistics;",
|
||||
Expected: []sql.Row{{2}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// TestProviderReloadScriptWithEngine runs the test script given with the engine provided.
|
||||
func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) {
|
||||
ctx := enginetest.NewContext(harness)
|
||||
err := enginetest.CreateNewConnectionForServerEngine(ctx, e)
|
||||
require.NoError(t, err, nil)
|
||||
|
||||
t.Run(script.Name, func(t *testing.T) {
|
||||
for _, statement := range script.SetUpScript {
|
||||
if sh, ok := harness.(enginetest.SkippingHarness); ok {
|
||||
if sh.SkipQueryTest(statement) {
|
||||
t.Skip()
|
||||
}
|
||||
}
|
||||
ctx = ctx.WithQuery(statement)
|
||||
enginetest.RunQueryWithContext(t, e, harness, ctx, statement)
|
||||
}
|
||||
|
||||
assertions := script.Assertions
|
||||
if len(assertions) == 0 {
|
||||
assertions = []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: script.Query,
|
||||
Expected: script.Expected,
|
||||
ExpectedErr: script.ExpectedErr,
|
||||
ExpectedIndexes: script.ExpectedIndexes,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// reload provider, get disk stats
|
||||
eng, ok := e.(*gms.Engine)
|
||||
if !ok {
|
||||
t.Errorf("expected *gms.Engine but found: %T", e)
|
||||
}
|
||||
|
||||
branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb")
|
||||
brCopy := make([]string, len(branches))
|
||||
copy(brCopy, branches)
|
||||
err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
|
||||
require.NoError(t, err)
|
||||
for _, branch := range brCopy {
|
||||
err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, assertion := range assertions {
|
||||
t.Run(assertion.Query, func(t *testing.T) {
|
||||
if assertion.NewSession {
|
||||
th, ok := harness.(enginetest.TransactionHarness)
|
||||
require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+
|
||||
"but harness doesn't implement TransactionHarness")
|
||||
ctx = th.NewSession()
|
||||
}
|
||||
|
||||
if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) {
|
||||
t.Skip()
|
||||
}
|
||||
if assertion.Skip {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
if assertion.ExpectedErr != nil {
|
||||
enginetest.AssertErr(t, e, harness, assertion.Query, nil, assertion.ExpectedErr)
|
||||
} else if assertion.ExpectedErrStr != "" {
|
||||
enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, nil, assertion.ExpectedErrStr)
|
||||
} else if assertion.ExpectedWarning != 0 {
|
||||
enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query,
|
||||
assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount,
|
||||
assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck)
|
||||
} else if assertion.SkipResultsCheck {
|
||||
enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query)
|
||||
} else if assertion.CheckIndexedAccess {
|
||||
enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
|
||||
} else {
|
||||
var expected = assertion.Expected
|
||||
if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine {
|
||||
// TODO: remove this check in the future
|
||||
expected = nil
|
||||
}
|
||||
enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings, nil)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func mustNewStatQual(s string) sql.StatQualifier {
|
||||
qual, _ := sql.NewQualifierFromString(s)
|
||||
return qual
|
||||
}
|
||||
|
||||
@@ -292,7 +292,7 @@ type IndexScanBuilder interface {
|
||||
|
||||
// NewSecondaryIter returns an object used to perform secondary lookups
|
||||
// for index joins.
|
||||
NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen
|
||||
NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error)
|
||||
|
||||
// Key returns the table root for caching purposes
|
||||
Key() doltdb.DataCacheKey
|
||||
@@ -395,7 +395,10 @@ func newNonCoveringLookupBuilder(s *durableIndexState, b *baseIndexImplBuilder)
|
||||
"primary index passed, but only secondary indexes are supported")
|
||||
}
|
||||
|
||||
primary := durable.ProllyMapFromIndex(s.Primary)
|
||||
primary, err := durable.ProllyMapFromIndex(s.Primary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
priKd, _ := primary.Descriptors()
|
||||
tbBld := val.NewTupleBuilder(priKd)
|
||||
pkMap := OrdinalMappingFromIndex(b.idx)
|
||||
@@ -452,7 +455,7 @@ func (ib *baseIndexImplBuilder) NewRangeMapIter(_ context.Context, _ prolly.Rang
|
||||
panic("cannot call NewMapIter on baseIndexImplBuilder")
|
||||
}
|
||||
|
||||
func (ib *baseIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
|
||||
func (ib *baseIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
|
||||
panic("cannot call NewSecondaryIter on baseIndexImplBuilder")
|
||||
}
|
||||
|
||||
@@ -628,11 +631,11 @@ func (ib *coveringIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, part s
|
||||
}
|
||||
|
||||
// NewSecondaryIter implements IndexScanBuilder
|
||||
func (ib *coveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
|
||||
func (ib *coveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
|
||||
if strict {
|
||||
return &covStrictSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx}
|
||||
return &covStrictSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx}, nil
|
||||
} else {
|
||||
return &covLaxSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx, nullSafe: nullSafe}
|
||||
return &covLaxSecondaryLookupGen{m: ib.sec, prefixDesc: ib.secKd.PrefixDesc(cnt), index: ib.idx, nullSafe: nullSafe}, nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -735,11 +738,11 @@ func (ib *nonCoveringIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, par
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (ib *nonCoveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
|
||||
func (ib *nonCoveringIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
|
||||
if strict {
|
||||
return &nonCovStrictSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt)}
|
||||
return &nonCovStrictSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt)}, nil
|
||||
} else {
|
||||
return &nonCovLaxSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt), nullSafe: nullSafe}
|
||||
return &nonCovLaxSecondaryLookupGen{pri: ib.pri, sec: ib.sec, pkMap: ib.pkMap, pkBld: ib.pkBld, sch: ib.idx.tableSch, prefixDesc: ib.secKd.PrefixDesc(cnt), nullSafe: nullSafe}, nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -766,12 +769,18 @@ func (ib *keylessIndexImplBuilder) OutputSchema() schema.Schema {
|
||||
func (ib *keylessIndexImplBuilder) NewRangeMapIter(ctx context.Context, r prolly.Range, reverse bool) (prolly.MapIter, error) {
|
||||
rows := ib.s.Primary
|
||||
dsecondary := ib.s.Secondary
|
||||
secondary := durable.ProllyMapFromIndex(dsecondary)
|
||||
secondary, err := durable.ProllyMapFromIndex(dsecondary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
indexIter, err := secondary.IterRange(ctx, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
clustered := durable.ProllyMapFromIndex(rows)
|
||||
clustered, err := durable.ProllyMapFromIndex(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
keyDesc := clustered.KeyDesc()
|
||||
indexMap := OrdinalMappingFromIndex(ib.idx)
|
||||
|
||||
@@ -832,12 +841,18 @@ func (ib *keylessIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, part sq
|
||||
return newProllyKeylessIndexIter(ctx, ib.idx, prollyRange, doltgresRange, ib.sch, ib.projections, ib.s.Primary, ib.s.Secondary, reverse)
|
||||
}
|
||||
|
||||
func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
|
||||
pri := durable.ProllyMapFromIndex(ib.s.Primary)
|
||||
func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
|
||||
pri, err := durable.ProllyMapFromIndex(ib.s.Primary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pkDesc, _ := pri.Descriptors()
|
||||
pkBld := val.NewTupleBuilder(pkDesc)
|
||||
|
||||
secondary := durable.ProllyMapFromIndex(ib.s.Secondary)
|
||||
secondary, err := durable.ProllyMapFromIndex(ib.s.Secondary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &keylessSecondaryLookupGen{
|
||||
pri: pri,
|
||||
@@ -846,7 +861,7 @@ func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSa
|
||||
pkMap: OrdinalMappingFromIndex(ib.idx),
|
||||
pkBld: pkBld,
|
||||
prefixDesc: secondary.KeyDesc().PrefixDesc(cnt),
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
type nomsIndexImplBuilder struct {
|
||||
@@ -870,7 +885,7 @@ func (ib *nomsIndexImplBuilder) NewRangeMapIter(ctx context.Context, r prolly.Ra
|
||||
panic("cannot call NewMapIter on *nomsIndexImplBuilder")
|
||||
}
|
||||
|
||||
func (ib *nomsIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen {
|
||||
func (ib *nomsIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) (SecondaryLookupIterGen, error) {
|
||||
panic("cannot call NewSecondaryIter on *nomsIndexImplBuilder")
|
||||
}
|
||||
|
||||
|
||||
@@ -59,13 +59,20 @@ func newProllyIndexIter(
|
||||
projections []uint64,
|
||||
dprimary, dsecondary durable.Index,
|
||||
) (prollyIndexIter, error) {
|
||||
secondary := durable.ProllyMapFromIndex(dsecondary)
|
||||
secondary, err := durable.ProllyMapFromIndex(dsecondary)
|
||||
if err != nil {
|
||||
return prollyIndexIter{}, err
|
||||
}
|
||||
|
||||
indexIter, err := secondary.IterRange(ctx, rng)
|
||||
if err != nil {
|
||||
return prollyIndexIter{}, err
|
||||
}
|
||||
|
||||
primary := durable.ProllyMapFromIndex(dprimary)
|
||||
primary, err := durable.ProllyMapFromIndex(dprimary)
|
||||
if err != nil {
|
||||
return prollyIndexIter{}, err
|
||||
}
|
||||
kd, _ := primary.Descriptors()
|
||||
pkBld := val.NewTupleBuilder(kd)
|
||||
pkMap := OrdinalMappingFromIndex(idx)
|
||||
@@ -183,7 +190,10 @@ func newProllyCoveringIndexIter(
|
||||
projections []uint64,
|
||||
indexdata durable.Index,
|
||||
) (prollyCoveringIndexIter, error) {
|
||||
secondary := durable.ProllyMapFromIndex(indexdata)
|
||||
secondary, err := durable.ProllyMapFromIndex(indexdata)
|
||||
if err != nil {
|
||||
return prollyCoveringIndexIter{}, err
|
||||
}
|
||||
indexIter, err := secondary.IterRange(ctx, rng)
|
||||
if err != nil {
|
||||
return prollyCoveringIndexIter{}, err
|
||||
@@ -293,9 +303,11 @@ type prollyKeylessIndexIter struct {
|
||||
var _ sql.RowIter = prollyKeylessIndexIter{}
|
||||
|
||||
func newProllyKeylessIndexIter(ctx *sql.Context, idx DoltIndex, rng prolly.Range, doltgresRange *DoltgresRange, pkSch sql.PrimaryKeySchema, projections []uint64, rows, dsecondary durable.Index, reverse bool) (prollyKeylessIndexIter, error) {
|
||||
secondary := durable.ProllyMapFromIndex(dsecondary)
|
||||
secondary, err := durable.ProllyMapFromIndex(dsecondary)
|
||||
if err != nil {
|
||||
return prollyKeylessIndexIter{}, err
|
||||
}
|
||||
var indexIter prolly.MapIter
|
||||
var err error
|
||||
if doltgresRange == nil {
|
||||
if reverse {
|
||||
indexIter, err = secondary.IterRangeReverse(ctx, rng)
|
||||
@@ -312,7 +324,10 @@ func newProllyKeylessIndexIter(ctx *sql.Context, idx DoltIndex, rng prolly.Range
|
||||
}
|
||||
}
|
||||
|
||||
clustered := durable.ProllyMapFromIndex(rows)
|
||||
clustered, err := durable.ProllyMapFromIndex(rows)
|
||||
if err != nil {
|
||||
return prollyKeylessIndexIter{}, err
|
||||
}
|
||||
keyDesc, valDesc := clustered.Descriptors()
|
||||
indexMap := OrdinalMappingFromIndex(idx)
|
||||
keyBld := val.NewTupleBuilder(keyDesc)
|
||||
|
||||
@@ -364,7 +364,10 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M
|
||||
if rowData.Format() != types.Format_DOLT {
|
||||
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, nil
|
||||
}
|
||||
priMap = durable.ProllyMapFromIndex(rowData)
|
||||
priMap, err = durable.ProllyMapFromIndex(rowData)
|
||||
if err != nil {
|
||||
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
|
||||
}
|
||||
|
||||
priSch = lb.OutputSchema()
|
||||
|
||||
@@ -384,7 +387,7 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M
|
||||
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
|
||||
}
|
||||
} else {
|
||||
dstIter = lb.NewSecondaryIter(n.IsStrictLookup(), len(n.Expressions()), n.NullMask())
|
||||
dstIter, _ = lb.NewSecondaryIter(n.IsStrictLookup(), len(n.Expressions()), n.NullMask())
|
||||
}
|
||||
|
||||
case *plan.ResolvedTable:
|
||||
@@ -414,7 +417,10 @@ func getSourceKv(ctx *sql.Context, n sql.Node, isSrc bool) (prolly.Map, prolly.M
|
||||
if err != nil {
|
||||
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
|
||||
}
|
||||
priMap = durable.ProllyMapFromIndex(priIndex)
|
||||
priMap, err = durable.ProllyMapFromIndex(priIndex)
|
||||
if err != nil {
|
||||
return prolly.Map{}, prolly.Map{}, nil, nil, nil, nil, nil, nil, err
|
||||
}
|
||||
secMap = priMap
|
||||
|
||||
srcIter, err = priMap.IterAll(ctx)
|
||||
@@ -535,7 +541,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) {
|
||||
if err != nil {
|
||||
return ms, err
|
||||
}
|
||||
ms.idxMap = durable.ProllyMapFromIndex(secIdx)
|
||||
ms.idxMap, err = durable.ProllyMapFromIndex(secIdx)
|
||||
if err != nil {
|
||||
return mergeState{}, err
|
||||
}
|
||||
table, err = doltTable.DoltTable(ctx)
|
||||
if err != nil {
|
||||
return ms, err
|
||||
@@ -560,7 +569,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) {
|
||||
if err != nil {
|
||||
return ms, err
|
||||
}
|
||||
ms.idxMap = durable.ProllyMapFromIndex(priIndex)
|
||||
ms.idxMap, err = durable.ProllyMapFromIndex(priIndex)
|
||||
if err != nil {
|
||||
return mergeState{}, err
|
||||
}
|
||||
secIterGen = index.NewKeylessIndexImplBuilder(priIndex, secIdx, idx)
|
||||
} else {
|
||||
secIterGen = index.NewSecondaryIterGen(ms.idxMap)
|
||||
@@ -584,7 +596,10 @@ func getMergeKv(ctx *sql.Context, n sql.Node) (mergeState, error) {
|
||||
return ms, err
|
||||
}
|
||||
|
||||
priMap := durable.ProllyMapFromIndex(priIndex)
|
||||
priMap, err := durable.ProllyMapFromIndex(priIndex)
|
||||
if err != nil {
|
||||
return ms, err
|
||||
}
|
||||
pkMap := index.OrdinalMappingFromIndex(idx)
|
||||
priKd, _ := priMap.Descriptors()
|
||||
pkBld := val.NewTupleBuilder(priKd)
|
||||
|
||||
@@ -33,7 +33,6 @@ import (
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
dsql "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
@@ -144,11 +143,10 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error {
|
||||
return err
|
||||
}
|
||||
|
||||
statsPro := statspro.NewProvider(pro.(*dsql.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(env.NewGRPCDialProviderFromDoltEnv(dEnv)))
|
||||
gcSafepointController := dsess.NewGCSafepointController()
|
||||
|
||||
config, _ := dEnv.Config.GetConfig(env.GlobalConfig)
|
||||
sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statsPro, gcSafepointController)
|
||||
sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statspro.StatsNoop{}, gcSafepointController)
|
||||
h.sess = sqlCtx.Session.(*dsess.DoltSession)
|
||||
|
||||
dbs := h.engine.Analyzer.Catalog.AllDatabases(sqlCtx)
|
||||
|
||||
@@ -183,7 +183,11 @@ func ProllyRowIterFromPartition(
|
||||
projections []uint64,
|
||||
partition doltTablePartition,
|
||||
) (sql.RowIter, error) {
|
||||
rows := durable.ProllyMapFromIndex(partition.rowData)
|
||||
rows, err := durable.ProllyMapFromIndex(partition.rowData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c, err := rows.Count()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -243,7 +247,10 @@ func DoltTablePartitionToRowIter(ctx *sql.Context, name string, table *doltdb.Ta
|
||||
}
|
||||
|
||||
if types.IsFormat_DOLT(data.Format()) {
|
||||
idx := durable.ProllyMapFromIndex(data)
|
||||
idx, err := durable.ProllyMapFromIndex(data)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
c, err := idx.Count()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
||||
@@ -1127,6 +1127,7 @@ func newTestEngine(ctx context.Context, dEnv *env.DoltEnv) (*gms.Engine, *sql.Co
|
||||
IsServerLocked: false,
|
||||
}), sqlCtx
|
||||
}
|
||||
|
||||
func TestIndexOverwrite(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dEnv := dtestutils.CreateTestEnv()
|
||||
|
||||
@@ -1,489 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statsnoms
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/earl"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/store/datas"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory {
|
||||
return &NomsStatsFactory{dialPro: dialPro}
|
||||
}
|
||||
|
||||
type NomsStatsFactory struct {
|
||||
dialPro dbfactory.GRPCDialProvider
|
||||
}
|
||||
|
||||
var _ statspro.StatsFactory = NomsStatsFactory{}
|
||||
|
||||
func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) {
|
||||
params := make(map[string]interface{})
|
||||
params[dbfactory.GRPCDialProviderParam] = sf.dialPro
|
||||
|
||||
var urlPath string
|
||||
u, err := earl.Parse(prov.DbFactoryUrl())
|
||||
if u.Scheme == dbfactory.MemScheme {
|
||||
urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir)
|
||||
} else if u.Scheme == dbfactory.FileScheme {
|
||||
urlPath = doltdb.LocalDirDoltDB
|
||||
}
|
||||
|
||||
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var dEnv *env.DoltEnv
|
||||
exists, isDir := statsFs.Exists("")
|
||||
if !exists {
|
||||
err := statsFs.MkDirs("")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
|
||||
}
|
||||
|
||||
dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test")
|
||||
sess := dsess.DSessFromSess(ctx.Session)
|
||||
err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if !isDir {
|
||||
return nil, fmt.Errorf("file exists where the dolt stats directory should be")
|
||||
} else {
|
||||
dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "", "")
|
||||
}
|
||||
|
||||
dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params)
|
||||
|
||||
deaf := dEnv.DbEaFactory(ctx)
|
||||
|
||||
tmpDir, err := dEnv.TempTableFilesDir()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
opts := editor.Options{
|
||||
Deaf: deaf,
|
||||
Tempdir: tmpDir,
|
||||
}
|
||||
statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewNomsStats(sourceDb, statsDb), nil
|
||||
}
|
||||
|
||||
func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase {
|
||||
return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb}
|
||||
}
|
||||
|
||||
type dbStats map[sql.StatQualifier]*statspro.DoltStats
|
||||
|
||||
type NomsStatsDatabase struct {
|
||||
mu *sync.Mutex
|
||||
destDb dsess.SqlDatabase
|
||||
sourceDb dsess.SqlDatabase
|
||||
stats []dbStats
|
||||
branches []string
|
||||
tableHashes []map[string]hash.Hash
|
||||
schemaHashes []map[string]hash.Hash
|
||||
dirty []*prolly.MutableMap
|
||||
}
|
||||
|
||||
var _ statspro.Database = (*NomsStatsDatabase)(nil)
|
||||
|
||||
func (n *NomsStatsDatabase) Close() error {
|
||||
return n.destDb.DbData().Ddb.Close()
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) Branches() []string {
|
||||
return n.branches
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
|
||||
branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch)
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
|
||||
if err != nil {
|
||||
ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch)
|
||||
return nil
|
||||
}
|
||||
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
|
||||
if !ok {
|
||||
return fmt.Errorf("branch/database not found: %s", branchQDbName)
|
||||
}
|
||||
|
||||
if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil {
|
||||
return err
|
||||
} else if ok {
|
||||
ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name())
|
||||
if err := n.DeleteBranchStats(ctx, branch, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch)
|
||||
if errors.Is(err, doltdb.ErrNoStatistics) {
|
||||
return n.trackBranch(ctx, branch)
|
||||
} else if errors.Is(err, datas.ErrNoBranchStats) {
|
||||
return n.trackBranch(ctx, branch)
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
if cnt, err := statsMap.Count(); err != nil {
|
||||
return err
|
||||
} else if cnt == 0 {
|
||||
return n.trackBranch(ctx, branch)
|
||||
}
|
||||
|
||||
doltStats, err := loadStats(ctx, branchQDb, statsMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.branches = append(n.branches, branch)
|
||||
n.stats = append(n.stats, doltStats)
|
||||
n.dirty = append(n.dirty, nil)
|
||||
n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
|
||||
n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) {
|
||||
root, err := branchQDb.GetRoot(ctx)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
tables, err := branchQDb.GetTableNames(ctx)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var keys []string
|
||||
var schHashes []hash.Hash
|
||||
for _, tableName := range tables {
|
||||
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
curHash, err := table.GetSchemaHash(ctx)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
keys = append(keys, n.schemaTupleKey(branch, tableName))
|
||||
schHashes = append(schHashes, curHash)
|
||||
}
|
||||
|
||||
ddb := n.destDb.DbData().Ddb
|
||||
var schemaChange bool
|
||||
for i, key := range keys {
|
||||
curHash := schHashes[i]
|
||||
if val, ok, err := ddb.GetTuple(ctx, key); err != nil {
|
||||
return false, err
|
||||
} else if ok {
|
||||
oldHash := hash.Parse(string(val))
|
||||
if !ok || !oldHash.Equal(curHash) {
|
||||
schemaChange = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if schemaChange {
|
||||
for _, key := range keys {
|
||||
ddb.DeleteTuple(ctx, key)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats {
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(b, branch) {
|
||||
return n.stats[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
stats := n.getBranchStats(branch)
|
||||
ret, ok := stats[qual]
|
||||
return ret, ok
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
stats := n.getBranchStats(branch)
|
||||
var ret []sql.StatQualifier
|
||||
for qual, _ := range stats {
|
||||
ret = append(ret, qual)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) setStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
|
||||
var statsMap *prolly.MutableMap
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(branch, b) {
|
||||
n.stats[i][qual] = stats
|
||||
if n.dirty[i] == nil {
|
||||
if err := n.initMutable(ctx, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
statsMap = n.dirty[i]
|
||||
}
|
||||
}
|
||||
if statsMap == nil {
|
||||
if err := n.trackBranch(ctx, branch); err != nil {
|
||||
return err
|
||||
}
|
||||
statsMap = n.dirty[len(n.branches)-1]
|
||||
n.stats[len(n.branches)-1][qual] = stats
|
||||
}
|
||||
|
||||
return n.replaceStats(ctx, statsMap, stats)
|
||||
}
|
||||
func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
return n.setStat(ctx, branch, qual, stats)
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error {
|
||||
n.branches = append(n.branches, branch)
|
||||
n.stats = append(n.stats, make(dbStats))
|
||||
n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
|
||||
n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
|
||||
|
||||
ns := n.destDb.DbData().Ddb.NodeStore()
|
||||
kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors(ns)
|
||||
newMap, err := prolly.NewMapFromTuples(ctx, ns, kd, vd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.dirty = append(n.dirty, newMap.Mutate())
|
||||
return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf())
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error {
|
||||
statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.dirty[i] = statsMap.Mutate()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(b, branch) {
|
||||
for _, qual := range quals {
|
||||
ctx.GetLogger().Debugf("statistics refresh: deleting index statistics: %s/%s", branch, qual)
|
||||
delete(n.stats[i], qual)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
ctx.GetLogger().Debugf("statistics refresh: deleting branch statistics: %s", branch)
|
||||
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(b, branch) {
|
||||
n.branches = append(n.branches[:i], n.branches[i+1:]...)
|
||||
n.dirty = append(n.dirty[:i], n.dirty[i+1:]...)
|
||||
n.stats = append(n.stats[:i], n.stats[i+1:]...)
|
||||
n.tableHashes = append(n.tableHashes[:i], n.tableHashes[i+1:]...)
|
||||
n.schemaHashes = append(n.schemaHashes[:i], n.schemaHashes[i+1:]...)
|
||||
}
|
||||
}
|
||||
if flush {
|
||||
return n.destDb.DbData().Ddb.DropStatisics(ctx, branch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
var dbStat dbStats
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(b, branch) {
|
||||
// naive merge the new with old
|
||||
dbStat = n.stats[i]
|
||||
}
|
||||
}
|
||||
|
||||
if dbStat == nil {
|
||||
if err := n.trackBranch(ctx, branch); err != nil {
|
||||
return err
|
||||
}
|
||||
dbStat = n.stats[len(n.branches)-1]
|
||||
}
|
||||
|
||||
if _, ok := dbStat[qual]; ok {
|
||||
oldChunks := dbStat[qual].Hist
|
||||
targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newStat, err := dbStat[qual].WithHistogram(targetBuckets)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dbStat[qual] = newStat.(*statspro.DoltStats)
|
||||
} else {
|
||||
dbStat[qual] = statspro.NewDoltStats()
|
||||
}
|
||||
dbStat[qual].Chunks = targetHashes
|
||||
dbStat[qual].UpdateActive()
|
||||
|
||||
// let |n.SetStats| update memory and disk
|
||||
return n.setStat(ctx, branch, qual, dbStat[qual])
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(b, branch) {
|
||||
if n.dirty[i] != nil {
|
||||
flushedMap, err := n.dirty[i].Map(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.dirty[i] = nil
|
||||
if err := n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf()); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) GetTableHash(branch, tableName string) hash.Hash {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(branch, b) {
|
||||
return n.tableHashes[i][tableName]
|
||||
}
|
||||
}
|
||||
return hash.Hash{}
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) SetTableHash(branch, tableName string, h hash.Hash) {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(branch, b) {
|
||||
n.tableHashes[i][tableName] = h
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(branch, b) {
|
||||
return n.schemaHashes[i][tableName], nil
|
||||
}
|
||||
if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok {
|
||||
if err != nil {
|
||||
return hash.Hash{}, err
|
||||
}
|
||||
h := hash.Parse(string(val))
|
||||
n.schemaHashes[i][tableName] = h
|
||||
return h, nil
|
||||
} else if err != nil {
|
||||
return hash.Hash{}, err
|
||||
}
|
||||
break
|
||||
}
|
||||
return hash.Hash{}, nil
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string {
|
||||
return n.sourceDb.Name() + "/" + branch + "/" + tableName
|
||||
}
|
||||
|
||||
func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
branchIdx := -1
|
||||
for i, b := range n.branches {
|
||||
if strings.EqualFold(branch, b) {
|
||||
branchIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if branchIdx < 0 {
|
||||
branchIdx = len(n.branches)
|
||||
if err := n.trackBranch(ctx, branch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
n.schemaHashes[branchIdx][tableName] = h
|
||||
key := n.schemaTupleKey(branch, tableName)
|
||||
if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound {
|
||||
return err
|
||||
}
|
||||
|
||||
return n.destDb.DbData().Ddb.SetTuple(ctx, key, []byte(h.String()))
|
||||
}
|
||||
@@ -1,176 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statsnoms
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/planbuilder"
|
||||
"gopkg.in/errgo.v2/errors"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
var ErrIncompatibleVersion = errors.New("client stats version mismatch")
|
||||
|
||||
func NewStatsIter(ctx *sql.Context, schemaName string, m prolly.Map) (*statsIter, error) {
|
||||
iter, err := m.IterAll(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
kd, vd := m.Descriptors()
|
||||
keyBuilder := val.NewTupleBuilder(kd)
|
||||
valueBuilder := val.NewTupleBuilder(vd)
|
||||
ns := m.NodeStore()
|
||||
|
||||
return &statsIter{
|
||||
iter: iter,
|
||||
kb: keyBuilder,
|
||||
vb: valueBuilder,
|
||||
ns: ns,
|
||||
schemaName: schemaName,
|
||||
planb: planbuilder.New(ctx, nil, nil, nil),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// statsIter reads histogram buckets into string-compatible types.
|
||||
// Values that are SQL rows should be converted with statsIter.ParseRow.
|
||||
// todo: make a JSON compatible container for sql.Row w/ types so that we
|
||||
// can eagerly convert to sql.Row without sacrificing string printing.
|
||||
type statsIter struct {
|
||||
iter prolly.MapIter
|
||||
kb, vb *val.TupleBuilder
|
||||
ns tree.NodeStore
|
||||
planb *planbuilder.Builder
|
||||
currentQual string
|
||||
schemaName string
|
||||
currentTypes []sql.Type
|
||||
}
|
||||
|
||||
var _ sql.RowIter = (*statsIter)(nil)
|
||||
|
||||
func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) {
|
||||
k, v, err := s.iter.Next(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// deserialize K, V
|
||||
version, err := tree.GetField(ctx, s.vb.Desc, 0, v, s.ns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if version != schema.StatsVersion {
|
||||
return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
|
||||
}
|
||||
|
||||
var row sql.Row
|
||||
for i := 0; i < s.kb.Desc.Count(); i++ {
|
||||
f, err := tree.GetField(ctx, s.kb.Desc, i, k, s.ns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
row = append(row, f)
|
||||
}
|
||||
|
||||
for i := 0; i < s.vb.Desc.Count(); i++ {
|
||||
f, err := tree.GetField(ctx, s.vb.Desc, i, v, s.ns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
row = append(row, f)
|
||||
}
|
||||
|
||||
dbName := row[schema.StatsDbTag].(string)
|
||||
tableName := row[schema.StatsTableTag].(string)
|
||||
indexName := row[schema.StatsIndexTag].(string)
|
||||
position := row[schema.StatsPositionTag].(int64)
|
||||
_ = row[schema.StatsVersionTag]
|
||||
commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
|
||||
rowCount := row[schema.StatsRowCountTag].(int64)
|
||||
distinctCount := row[schema.StatsDistinctCountTag].(int64)
|
||||
nullCount := row[schema.StatsNullCountTag].(int64)
|
||||
columnsStr := row[schema.StatsColumnsTag].(string)
|
||||
typesStr := row[schema.StatsTypesTag].(string)
|
||||
upperBoundStr := row[schema.StatsUpperBoundTag].(string)
|
||||
upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64)
|
||||
createdAt := row[schema.StatsCreatedAtTag].(time.Time)
|
||||
|
||||
typs := strings.Split(typesStr, "\n")
|
||||
for i, t := range typs {
|
||||
typs[i] = strings.TrimSpace(t)
|
||||
}
|
||||
|
||||
qual := sql.NewStatQualifier(dbName, s.schemaName, tableName, indexName)
|
||||
if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) {
|
||||
s.currentQual = curQual
|
||||
s.currentTypes, err = parseTypeStrings(typs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
mcvCountsStr := row[schema.StatsMcvCountsTag].(string)
|
||||
|
||||
numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
|
||||
mcvs := make([]string, numMcvs)
|
||||
for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
|
||||
if v != nil {
|
||||
mcvs[i] = v.(string)
|
||||
}
|
||||
}
|
||||
|
||||
return sql.Row{
|
||||
dbName,
|
||||
tableName,
|
||||
indexName,
|
||||
int(position),
|
||||
version,
|
||||
commit.String(),
|
||||
uint64(rowCount),
|
||||
uint64(distinctCount),
|
||||
uint64(nullCount),
|
||||
columnsStr,
|
||||
typesStr,
|
||||
upperBoundStr,
|
||||
uint64(upperBoundCnt),
|
||||
createdAt,
|
||||
mcvs[0], mcvs[1], mcvs[2], mcvs[3],
|
||||
mcvCountsStr,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *statsIter) ParseRow(rowStr string) (sql.Row, error) {
|
||||
var row sql.Row
|
||||
for i, v := range strings.Split(rowStr, ",") {
|
||||
val, _, err := s.currentTypes[i].Convert(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
row = append(row, val)
|
||||
}
|
||||
return row, nil
|
||||
}
|
||||
|
||||
func (s *statsIter) Close(context *sql.Context) error {
|
||||
return nil
|
||||
}
|
||||
@@ -1,308 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statsnoms
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/planbuilder"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) {
|
||||
qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats)
|
||||
schemaName := db.SchemaName()
|
||||
iter, err := NewStatsIter(ctx, schemaName, m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
currentStat := statspro.NewDoltStats()
|
||||
invalidTables := make(map[string]bool)
|
||||
for {
|
||||
row, err := iter.Next(ctx)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// deserialize K, V
|
||||
dbName := row[schema.StatsDbTag].(string)
|
||||
tableName := row[schema.StatsTableTag].(string)
|
||||
indexName := row[schema.StatsIndexTag].(string)
|
||||
_ = row[schema.StatsVersionTag]
|
||||
commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
|
||||
rowCount := row[schema.StatsRowCountTag].(uint64)
|
||||
distinctCount := row[schema.StatsDistinctCountTag].(uint64)
|
||||
nullCount := row[schema.StatsNullCountTag].(uint64)
|
||||
columns := strings.Split(row[schema.StatsColumnsTag].(string), ",")
|
||||
typesStr := row[schema.StatsTypesTag].(string)
|
||||
boundRowStr := row[schema.StatsUpperBoundTag].(string)
|
||||
upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64)
|
||||
createdAt := row[schema.StatsCreatedAtTag].(time.Time)
|
||||
|
||||
typs := strings.Split(typesStr, "\n")
|
||||
for i, t := range typs {
|
||||
typs[i] = strings.TrimSpace(t)
|
||||
}
|
||||
|
||||
qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName)
|
||||
if _, ok := invalidTables[tableName]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if currentStat.Statistic.Qual.String() != qual.String() {
|
||||
if !currentStat.Statistic.Qual.Empty() {
|
||||
currentStat.UpdateActive()
|
||||
qualToStats[currentStat.Statistic.Qual] = currentStat
|
||||
}
|
||||
|
||||
currentStat = statspro.NewDoltStats()
|
||||
|
||||
tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
|
||||
if ok {
|
||||
currentStat.Statistic.Qual = qual
|
||||
currentStat.Statistic.Cols = columns
|
||||
currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if !ok {
|
||||
ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName)
|
||||
invalidTables[qual.Table()] = true
|
||||
continue
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
|
||||
|
||||
mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",")
|
||||
mcvCnts := make([]uint64, numMcvs)
|
||||
for i, v := range mcvCountsStr {
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
val, err := strconv.Atoi(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mcvCnts[i] = uint64(val)
|
||||
}
|
||||
|
||||
mcvs := make([]sql.Row, numMcvs)
|
||||
for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
|
||||
if v != nil && v != "" {
|
||||
row, err := DecodeRow(ctx, m.NodeStore(), v.(string), currentStat.Tb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mcvs[i] = row
|
||||
}
|
||||
}
|
||||
|
||||
for i, v := range mcvCnts {
|
||||
if v == 0 {
|
||||
mcvs = mcvs[:i]
|
||||
mcvCnts = mcvCnts[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if currentStat.Statistic.Hist == nil {
|
||||
currentStat.Statistic.Typs, err = parseTypeStrings(typs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
currentStat.Statistic.Qual = qual
|
||||
}
|
||||
|
||||
boundRow, err := DecodeRow(ctx, m.NodeStore(), boundRowStr, currentStat.Tb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bucket := statspro.DoltBucket{
|
||||
Chunk: commit,
|
||||
Created: createdAt,
|
||||
Bucket: &stats.Bucket{
|
||||
RowCnt: uint64(rowCount),
|
||||
DistinctCnt: uint64(distinctCount),
|
||||
NullCnt: uint64(nullCount),
|
||||
McvVals: mcvs,
|
||||
McvsCnt: mcvCnts,
|
||||
BoundCnt: upperBoundCnt,
|
||||
BoundVal: boundRow,
|
||||
},
|
||||
}
|
||||
|
||||
currentStat.Hist = append(currentStat.Hist, bucket)
|
||||
currentStat.Statistic.RowCnt += uint64(rowCount)
|
||||
currentStat.Statistic.DistinctCnt += uint64(distinctCount)
|
||||
currentStat.Statistic.NullCnt += uint64(rowCount)
|
||||
if currentStat.Statistic.Created.Before(createdAt) {
|
||||
currentStat.Statistic.Created = createdAt
|
||||
}
|
||||
}
|
||||
if !currentStat.Qualifier().Empty() {
|
||||
currentStat.UpdateActive()
|
||||
qualToStats[currentStat.Statistic.Qual] = currentStat
|
||||
}
|
||||
return qualToStats, nil
|
||||
}
|
||||
|
||||
func parseTypeStrings(typs []string) ([]sql.Type, error) {
|
||||
var ret []sql.Type
|
||||
for _, typ := range typs {
|
||||
ct, err := planbuilder.ParseColumnTypeString(typ)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret = append(ret, ct)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) {
|
||||
root, err := db.GetRoot(ctx)
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
|
||||
iat, ok := sqlTable.(sql.IndexAddressable)
|
||||
if !ok {
|
||||
return nil, nil, nil, sql.ColSet{}, nil
|
||||
}
|
||||
|
||||
indexes, err := iat.GetIndexes(ctx)
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
|
||||
var sqlIdx sql.Index
|
||||
for _, i := range indexes {
|
||||
if strings.EqualFold(i.ID(), qual.Index()) {
|
||||
sqlIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if sqlIdx == nil {
|
||||
return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
|
||||
}
|
||||
|
||||
fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx)
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()})
|
||||
if !ok {
|
||||
return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table())
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
|
||||
var idx durable.Index
|
||||
if qual.Index() == "primary" {
|
||||
idx, err = table.GetRowData(ctx)
|
||||
} else {
|
||||
idx, err = table.GetIndexRowData(ctx, qual.Index())
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
|
||||
prollyMap := durable.ProllyMapFromIndex(idx)
|
||||
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols))
|
||||
buffPool := prollyMap.NodeStore().Pool()
|
||||
|
||||
if cnt, err := prollyMap.Count(); err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
} else if cnt == 0 {
|
||||
return nil, keyBuilder, nil, sql.ColSet{}, nil
|
||||
}
|
||||
firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
keyBytes, _, err := firstIter.Next(ctx)
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
for i := range keyBuilder.Desc.Types {
|
||||
keyBuilder.PutRaw(i, keyBytes.GetField(i))
|
||||
}
|
||||
|
||||
firstKey := keyBuilder.Build(buffPool)
|
||||
firstRow := make(sql.Row, keyBuilder.Desc.Count())
|
||||
for i := 0; i < keyBuilder.Desc.Count(); i++ {
|
||||
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
|
||||
if err != nil {
|
||||
return nil, nil, nil, sql.ColSet{}, err
|
||||
}
|
||||
}
|
||||
return firstRow, keyBuilder, fds, colset, nil
|
||||
}
|
||||
|
||||
func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
|
||||
tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
|
||||
if err != nil {
|
||||
return nil, sql.ColSet{}, err
|
||||
} else if !ok {
|
||||
return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table())
|
||||
}
|
||||
|
||||
iat, ok := tab.(sql.IndexAddressable)
|
||||
if !ok {
|
||||
return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table())
|
||||
}
|
||||
|
||||
indexes, err := iat.GetIndexes(ctx)
|
||||
if err != nil {
|
||||
return nil, sql.ColSet{}, err
|
||||
}
|
||||
|
||||
var idx sql.Index
|
||||
for _, i := range indexes {
|
||||
if strings.EqualFold(i.ID(), qual.Index()) {
|
||||
idx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if idx == nil {
|
||||
return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
|
||||
}
|
||||
|
||||
return stats.IndexFds(qual.Table(), tab.Schema(), idx)
|
||||
}
|
||||
@@ -1,181 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statsnoms
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
"github.com/dolthub/go-mysql-server/sql/types"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
// About ~200 20 byte address fit in a ~4k chunk. Chunk sizes
|
||||
// are approximate, but certainly shouldn't reach the square
|
||||
// of the expected size.
|
||||
const maxBucketFanout = 200 * 200
|
||||
|
||||
var mcvsTypes = []sql.Type{types.Int64, types.Int64, types.Int64}
|
||||
|
||||
func (n *NomsStatsDatabase) replaceStats(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
|
||||
if err := deleteIndexRows(ctx, statsMap, dStats); err != nil {
|
||||
return err
|
||||
}
|
||||
return putIndexRows(ctx, statsMap, dStats)
|
||||
}
|
||||
|
||||
func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
sch := schema.StatsTableDoltSchema
|
||||
kd, _ := sch.GetMapDescriptors(statsMap.NodeStore())
|
||||
|
||||
keyBuilder := val.NewTupleBuilder(kd)
|
||||
|
||||
qual := dStats.Qualifier()
|
||||
pool := statsMap.NodeStore().Pool()
|
||||
|
||||
// delete previous entries for this index -> (db, table, index, pos)
|
||||
keyBuilder.PutString(0, qual.Database)
|
||||
keyBuilder.PutString(1, qual.Table())
|
||||
keyBuilder.PutString(2, qual.Index())
|
||||
keyBuilder.PutInt64(3, 0)
|
||||
firstKey := keyBuilder.Build(pool)
|
||||
keyBuilder.PutString(0, qual.Database)
|
||||
keyBuilder.PutString(1, qual.Table())
|
||||
keyBuilder.PutString(2, qual.Index())
|
||||
keyBuilder.PutInt64(3, maxBucketFanout+1)
|
||||
maxKey := keyBuilder.Build(pool)
|
||||
|
||||
// there is a limit on the number of buckets for a given index, iter
|
||||
// will terminate before maxBucketFanout
|
||||
iter, err := statsMap.IterKeyRange(ctx, firstKey, maxKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
k, _, err := iter.Next(ctx)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
err = statsMap.Put(ctx, k, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
sch := schema.StatsTableDoltSchema
|
||||
kd, vd := sch.GetMapDescriptors(statsMap.NodeStore())
|
||||
|
||||
keyBuilder := val.NewTupleBuilder(kd)
|
||||
valueBuilder := val.NewTupleBuilder(vd)
|
||||
|
||||
qual := dStats.Qualifier()
|
||||
pool := statsMap.NodeStore().Pool()
|
||||
|
||||
// now add new buckets
|
||||
typesB := strings.Builder{}
|
||||
sep := ""
|
||||
for _, t := range dStats.Statistic.Typs {
|
||||
typesB.WriteString(sep + t.String())
|
||||
sep = "\n"
|
||||
}
|
||||
typesStr := typesB.String()
|
||||
|
||||
var pos int64
|
||||
for _, h := range dStats.Hist {
|
||||
keyBuilder.PutString(0, qual.Database)
|
||||
keyBuilder.PutString(1, qual.Tab)
|
||||
keyBuilder.PutString(2, qual.Idx)
|
||||
keyBuilder.PutInt64(3, pos)
|
||||
|
||||
valueBuilder.PutInt64(0, schema.StatsVersion)
|
||||
valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String())
|
||||
valueBuilder.PutInt64(2, int64(h.RowCount()))
|
||||
valueBuilder.PutInt64(3, int64(h.DistinctCount()))
|
||||
valueBuilder.PutInt64(4, int64(h.NullCount()))
|
||||
valueBuilder.PutString(5, strings.Join(dStats.Columns(), ","))
|
||||
valueBuilder.PutString(6, typesStr)
|
||||
boundRow, err := EncodeRow(ctx, statsMap.NodeStore(), h.UpperBound(), dStats.Tb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
valueBuilder.PutString(7, string(boundRow))
|
||||
valueBuilder.PutInt64(8, int64(h.BoundCount()))
|
||||
valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h))
|
||||
for i, r := range h.Mcvs() {
|
||||
mcvRow, err := EncodeRow(ctx, statsMap.NodeStore(), r, dStats.Tb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
valueBuilder.PutString(10+i, string(mcvRow))
|
||||
}
|
||||
var mcvCntsRow sql.Row
|
||||
for _, v := range h.McvCounts() {
|
||||
mcvCntsRow = append(mcvCntsRow, int(v))
|
||||
}
|
||||
valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, mcvsTypes))
|
||||
|
||||
key := keyBuilder.Build(pool)
|
||||
value := valueBuilder.Build(pool)
|
||||
statsMap.Put(ctx, key, value)
|
||||
pos++
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
|
||||
for i, v := range r {
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return tb.Build(ns.Pool()), nil
|
||||
}
|
||||
|
||||
func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
|
||||
tup := []byte(s)
|
||||
r := make(sql.Row, tb.Desc.Count())
|
||||
var err error
|
||||
for i, _ := range r {
|
||||
r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
@@ -1,351 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
)
|
||||
|
||||
const (
|
||||
boostrapRowLimit = 2e6
|
||||
)
|
||||
|
||||
func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return p.RefreshTableStatsWithBranch(ctx, table, db, branch)
|
||||
}
|
||||
|
||||
func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branches := p.getStatsBranches(ctx)
|
||||
var rows uint64
|
||||
for _, branch := range branches {
|
||||
sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
|
||||
if err != nil {
|
||||
if sql.ErrDatabaseNotFound.Is(err) {
|
||||
// default branch is not valid
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
tables, err := sqlDb.GetTableNames(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, table := range tables {
|
||||
sqlTable, _, err := GetLatestTable(ctx, table, sqlDb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if st, ok := sqlTable.(sql.StatisticsTable); ok {
|
||||
cnt, ok, err := st.RowCount(ctx)
|
||||
if ok && err == nil {
|
||||
rows += cnt
|
||||
}
|
||||
}
|
||||
if rows >= boostrapRowLimit {
|
||||
return fmt.Errorf("stats bootstrap aborted because %s exceeds the default row limit; manually run \"ANALYZE <table>\" or \"call dolt_stats_restart()\" to collect statistics", db)
|
||||
}
|
||||
|
||||
if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error {
|
||||
if !p.TryLockForUpdate(branch, db, table.Name()) {
|
||||
return fmt.Errorf("already updating statistics")
|
||||
}
|
||||
defer p.UnlockTable(branch, db, table.Name())
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
|
||||
sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// lock only after accessing DatabaseProvider
|
||||
|
||||
tableName := strings.ToLower(table.Name())
|
||||
dbName := strings.ToLower(db)
|
||||
var schemaName string
|
||||
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
|
||||
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
|
||||
}
|
||||
|
||||
iat, ok := table.(sql.IndexAddressableTable)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
indexes, err := iat.GetIndexes(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// it's important to update WORKING session references every call
|
||||
sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
statDb, ok := p.getStatDb(dbName)
|
||||
if !ok {
|
||||
// if the stats database does not exist, initialize one
|
||||
fs, err := p.pro.FileSystemForDatabase(dbName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sourceDb, ok := p.pro.BaseDatabase(ctx, dbName)
|
||||
if !ok {
|
||||
return sql.ErrDatabaseNotFound.New(dbName)
|
||||
}
|
||||
statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir)
|
||||
if err != nil {
|
||||
ctx.Warn(0, "%s", err.Error())
|
||||
return nil
|
||||
}
|
||||
p.setStatDb(dbName, statDb)
|
||||
}
|
||||
|
||||
schHash, err := dTab.GetSchemaHash(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, tableName); oldSchHash.IsEmpty() {
|
||||
if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
|
||||
return fmt.Errorf("set schema hash error: %w", err)
|
||||
}
|
||||
} else if oldSchHash != schHash {
|
||||
ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
|
||||
if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, tableName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, stat := range stats {
|
||||
statDb.DeleteStats(ctx, branch, stat.Qualifier())
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tablePrefix := fmt.Sprintf("%s.", tableName)
|
||||
var idxMetas []indexMeta
|
||||
for _, idx := range indexes {
|
||||
cols := make([]string, len(idx.Expressions()))
|
||||
for i, c := range idx.Expressions() {
|
||||
cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
|
||||
}
|
||||
|
||||
qual := sql.NewStatQualifier(db, schemaName, table.Name(), strings.ToLower(idx.ID()))
|
||||
curStat, ok := statDb.GetStat(branch, qual)
|
||||
if !ok {
|
||||
curStat = NewDoltStats()
|
||||
curStat.Statistic.Qual = qual
|
||||
}
|
||||
idxMeta, ok, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ok {
|
||||
idxMetas = append(idxMetas, idxMeta)
|
||||
}
|
||||
}
|
||||
|
||||
newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// merge new chunks with preexisting chunks
|
||||
for _, idxMeta := range idxMetas {
|
||||
stat, ok := newTableStats[idxMeta.qual]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetChunks == nil {
|
||||
// empty table
|
||||
continue
|
||||
}
|
||||
stat.SetChunks(idxMeta.allAddrs)
|
||||
stat.Hist = targetChunks
|
||||
stat.UpdateActive()
|
||||
if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
|
||||
return statDb.Flush(ctx, branch)
|
||||
}
|
||||
|
||||
// BranchQualifiedDatabase returns a branch qualified database. If the database
|
||||
// is already branch suffixed no duplication is applied.
|
||||
func BranchQualifiedDatabase(db, branch string) string {
|
||||
suffix := fmt.Sprintf("/%s", branch)
|
||||
if !strings.HasSuffix(db, suffix) {
|
||||
return fmt.Sprintf("%s%s", db, suffix)
|
||||
}
|
||||
return db
|
||||
}
|
||||
|
||||
// GetLatestTable will get the WORKING root table for the current database/branch
|
||||
func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) {
|
||||
var db sqle.Database
|
||||
switch d := sqlDb.(type) {
|
||||
case sqle.Database:
|
||||
db = d
|
||||
case sqle.ReadReplicaDatabase:
|
||||
db = d.Database
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
|
||||
}
|
||||
sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
|
||||
}
|
||||
|
||||
var dTab *doltdb.Table
|
||||
switch t := sqlTable.(type) {
|
||||
case *sqle.AlterableDoltTable:
|
||||
dTab, err = t.DoltTable.DoltTable(ctx)
|
||||
case *sqle.WritableDoltTable:
|
||||
dTab, err = t.DoltTable.DoltTable(ctx)
|
||||
case *sqle.DoltTable:
|
||||
dTab, err = t.DoltTable(ctx)
|
||||
default:
|
||||
err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return sqlTable, dTab, nil
|
||||
}
|
||||
|
||||
func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, bool, error) {
|
||||
var idx durable.Index
|
||||
var err error
|
||||
if strings.EqualFold(sqlIndex.ID(), "PRIMARY") {
|
||||
idx, err = doltTable.GetRowData(ctx)
|
||||
} else {
|
||||
idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID())
|
||||
}
|
||||
if err != nil {
|
||||
return indexMeta{}, false, err
|
||||
}
|
||||
|
||||
prollyMap, ok := durable.MaybeProllyMapFromIndex(idx)
|
||||
if !ok {
|
||||
return indexMeta{}, false, nil
|
||||
}
|
||||
|
||||
if cnt, err := prollyMap.Count(); err != nil {
|
||||
return indexMeta{}, false, err
|
||||
} else if cnt == 0 {
|
||||
return indexMeta{
|
||||
qual: curStats.Statistic.Qual,
|
||||
cols: cols,
|
||||
}, true, nil
|
||||
}
|
||||
|
||||
// get newest histogram target level hashes
|
||||
levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
|
||||
if err != nil {
|
||||
return indexMeta{}, false, err
|
||||
}
|
||||
|
||||
var addrs []hash.Hash
|
||||
var keepChunks []sql.HistogramBucket
|
||||
var missingAddrs float64
|
||||
var missingChunks []tree.Node
|
||||
var missingOffsets []updateOrdinal
|
||||
var offset uint64
|
||||
|
||||
for _, n := range levelNodes {
|
||||
// Compare the previous histogram chunks to the newest tree chunks.
|
||||
// Partition the newest chunks into 1) preserved or 2) missing.
|
||||
// Missing chunks will need to be scanned on a stats update, so
|
||||
// track the (start, end) ordinal offsets to simplify the read iter.
|
||||
treeCnt, err := n.TreeCount()
|
||||
if err != nil {
|
||||
return indexMeta{}, false, err
|
||||
}
|
||||
|
||||
addrs = append(addrs, n.HashOf())
|
||||
if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok {
|
||||
missingChunks = append(missingChunks, n)
|
||||
missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)})
|
||||
missingAddrs++
|
||||
} else {
|
||||
keepChunks = append(keepChunks, curStats.Hist[bucketIdx])
|
||||
}
|
||||
offset += uint64(treeCnt)
|
||||
}
|
||||
|
||||
var dropChunks []sql.HistogramBucket
|
||||
for _, h := range curStats.Chunks {
|
||||
var match bool
|
||||
for _, b := range keepChunks {
|
||||
if DoltBucketChunk(b) == h {
|
||||
match = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !match {
|
||||
dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]])
|
||||
}
|
||||
}
|
||||
|
||||
return indexMeta{
|
||||
qual: curStats.Statistic.Qual,
|
||||
cols: cols,
|
||||
newNodes: missingChunks,
|
||||
updateOrdinals: missingOffsets,
|
||||
keepChunks: keepChunks,
|
||||
dropChunks: dropChunks,
|
||||
allAddrs: addrs,
|
||||
}, true, nil
|
||||
}
|
||||
@@ -1,296 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
types2 "github.com/dolthub/go-mysql-server/sql/types"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
)
|
||||
|
||||
const asyncAutoRefreshStats = "async_auto_refresh_stats"
|
||||
|
||||
func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error {
|
||||
_, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
|
||||
_, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
|
||||
interval64, _, _ := types2.Int64.Convert(interval)
|
||||
intervalSec := time.Second * time.Duration(interval64.(int64))
|
||||
thresholdf64 := threshold.(float64)
|
||||
|
||||
ctx, err := ctxFactory(context.Background())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
branches := p.getStatsBranches(ctx)
|
||||
|
||||
return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches)
|
||||
}
|
||||
|
||||
func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error {
|
||||
// this is only called after initial statistics are finished loading
|
||||
// launch a thread that periodically checks freshness
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
dropDbCtx, dbStatsCancel := context.WithCancel(context.Background())
|
||||
p.autoCtxCancelers[dbName] = dbStatsCancel
|
||||
|
||||
return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) {
|
||||
ticker := time.NewTicker(checkInterval + time.Nanosecond)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ticker.Stop()
|
||||
return
|
||||
case <-ticker.C:
|
||||
select {
|
||||
case <-dropDbCtx.Done():
|
||||
ticker.Stop()
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
err := func() error {
|
||||
sqlCtx, err := ctxFactory(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sql.SessionEnd(sqlCtx.Session)
|
||||
sql.SessionCommandBegin(sqlCtx.Session)
|
||||
defer sql.SessionCommandEnd(sqlCtx.Session)
|
||||
|
||||
dSess := dsess.DSessFromSess(sqlCtx.Session)
|
||||
|
||||
ddb, ok := dSess.GetDoltDB(sqlCtx, dbName)
|
||||
if !ok {
|
||||
sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName)
|
||||
return errors.New("database not found")
|
||||
}
|
||||
for _, branch := range branches {
|
||||
if br, ok, err := ddb.HasBranch(sqlCtx, branch); ok {
|
||||
sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String())
|
||||
// update WORKING session references
|
||||
sqlDb, err := dSess.Provider().Database(sqlCtx, BranchQualifiedDatabase(dbName, branch))
|
||||
if err != nil {
|
||||
sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil {
|
||||
sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
} else if err != nil {
|
||||
sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error())
|
||||
} else {
|
||||
sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error {
|
||||
if !p.TryLockForUpdate(branch, dbName, "") {
|
||||
return fmt.Errorf("database already being updated: %s/%s", branch, dbName)
|
||||
}
|
||||
defer p.UnlockTable(branch, dbName, "")
|
||||
|
||||
// Iterate all dbs, tables, indexes. Each db will collect
|
||||
// []indexMeta above refresh threshold. We read and process those
|
||||
// chunks' statistics. We merge updated chunks with precomputed
|
||||
// chunks. The full set of statistics for each database lands
|
||||
// 1) in the provider's most recent set of database statistics, and
|
||||
// 2) on disk in the database's statistics ref'd prolly.Map.
|
||||
statDb, ok := p.getStatDb(dbName)
|
||||
if !ok {
|
||||
return sql.ErrDatabaseNotFound.New(dbName)
|
||||
}
|
||||
|
||||
var deletedStats []sql.StatQualifier
|
||||
qualExists := make(map[sql.StatQualifier]bool)
|
||||
tableExistsAndSkipped := make(map[string]bool)
|
||||
|
||||
tables, err := sqlDb.GetTableNames(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, table := range tables {
|
||||
if !p.TryLockForUpdate(branch, dbName, table) {
|
||||
ctx.GetLogger().Debugf("statistics refresh: table is already being updated: %s/%s.%s", branch, dbName, table)
|
||||
return fmt.Errorf("table already being updated: %s", table)
|
||||
}
|
||||
defer p.UnlockTable(branch, dbName, table)
|
||||
|
||||
sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tableHash, err := dTab.GetRowDataHash(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if statDb.GetTableHash(branch, table) == tableHash {
|
||||
// no data changes since last check
|
||||
tableExistsAndSkipped[table] = true
|
||||
ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash)
|
||||
continue
|
||||
} else {
|
||||
ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash)
|
||||
}
|
||||
|
||||
schHash, err := dTab.GetSchemaHash(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var schemaName string
|
||||
if schTab, ok := sqlTable.(sql.DatabaseSchemaTable); ok {
|
||||
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
|
||||
}
|
||||
|
||||
if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, table); oldSchHash.IsEmpty() {
|
||||
if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if oldSchHash != schHash {
|
||||
ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
|
||||
if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
|
||||
return err
|
||||
}
|
||||
stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, table)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, stat := range stats {
|
||||
statDb.DeleteStats(ctx, branch, stat.Qualifier())
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
iat, ok := sqlTable.(sql.IndexAddressableTable)
|
||||
if !ok {
|
||||
return fmt.Errorf("table does not support indexes %s", table)
|
||||
}
|
||||
|
||||
indexes, err := iat.GetIndexes(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// collect indexes and ranges to be updated
|
||||
var idxMetas []indexMeta
|
||||
for _, index := range indexes {
|
||||
qual := sql.NewStatQualifier(dbName, schemaName, table, strings.ToLower(index.ID()))
|
||||
qualExists[qual] = true
|
||||
curStat, ok := statDb.GetStat(branch, qual)
|
||||
if !ok {
|
||||
curStat = NewDoltStats()
|
||||
curStat.Statistic.Qual = qual
|
||||
|
||||
cols := make([]string, len(index.Expressions()))
|
||||
tablePrefix := fmt.Sprintf("%s.", table)
|
||||
for i, c := range index.Expressions() {
|
||||
cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
|
||||
}
|
||||
curStat.Statistic.Cols = cols
|
||||
}
|
||||
ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String())
|
||||
|
||||
updateMeta, ok, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
|
||||
if err != nil {
|
||||
ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
curCnt := float64(len(curStat.Active))
|
||||
updateCnt := float64(len(updateMeta.newNodes))
|
||||
deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks))
|
||||
ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt))
|
||||
|
||||
if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh {
|
||||
if curCnt == 0 && updateCnt == 0 {
|
||||
continue
|
||||
}
|
||||
ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual)
|
||||
// mark index for updating
|
||||
idxMetas = append(idxMetas, updateMeta)
|
||||
// update latest hash if we haven't already
|
||||
statDb.SetTableHash(branch, table, tableHash)
|
||||
}
|
||||
}
|
||||
|
||||
// get new buckets for index chunks to update
|
||||
newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// merge new chunks with preexisting chunks
|
||||
for _, updateMeta := range idxMetas {
|
||||
stat := newTableStats[updateMeta.qual]
|
||||
if stat != nil {
|
||||
var err error
|
||||
if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok {
|
||||
err = statDb.SetStat(ctx, branch, updateMeta.qual, stat)
|
||||
} else {
|
||||
err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, q := range statDb.ListStatQuals(branch) {
|
||||
// table or index delete leaves hole in stats
|
||||
// this is separate from threshold check
|
||||
if !tableExistsAndSkipped[q.Table()] && !qualExists[q] {
|
||||
// only delete stats we've verified are deleted
|
||||
deletedStats = append(deletedStats, q)
|
||||
}
|
||||
}
|
||||
|
||||
statDb.DeleteStats(ctx, branch, deletedStats...)
|
||||
|
||||
if err := statDb.Flush(ctx, branch); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
+16
-175
@@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Dolthub, Inc.
|
||||
// Copyright 2023-2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -17,19 +17,11 @@ package statspro
|
||||
import (
|
||||
"container/heap"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
@@ -40,156 +32,7 @@ const (
|
||||
mcvCnt = 3
|
||||
)
|
||||
|
||||
// createNewStatsBuckets builds histograms for a list of index statistic metadata.
|
||||
// We only read chunk ranges indicated by |indexMeta.updateOrdinals|. If
|
||||
// the returned buckets are a subset of the index the caller is responsible
|
||||
// for reconciling the difference.
|
||||
func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Table, indexes []sql.Index, idxMetas []indexMeta) (map[sql.StatQualifier]*DoltStats, error) {
|
||||
nameToIdx := make(map[string]sql.Index)
|
||||
for _, idx := range indexes {
|
||||
nameToIdx[strings.ToLower(idx.ID())] = idx
|
||||
}
|
||||
|
||||
ret := make(map[sql.StatQualifier]*DoltStats)
|
||||
|
||||
for _, meta := range idxMetas {
|
||||
sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())]
|
||||
if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() {
|
||||
continue
|
||||
}
|
||||
var idx durable.Index
|
||||
var err error
|
||||
if strings.EqualFold(meta.qual.Index(), "PRIMARY") {
|
||||
idx, err = dTab.GetRowData(ctx)
|
||||
} else {
|
||||
idx, err = dTab.GetIndexRowData(ctx, meta.qual.Index())
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
prollyMap := durable.ProllyMapFromIndex(idx)
|
||||
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc())
|
||||
|
||||
fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var types []sql.Type
|
||||
for _, cet := range nameToIdx[strings.ToLower(meta.qual.Index())].ColumnExpressionTypes() {
|
||||
types = append(types, cet.Type)
|
||||
}
|
||||
|
||||
if cnt, err := prollyMap.Count(); err != nil {
|
||||
return nil, err
|
||||
} else if cnt == 0 {
|
||||
// table is empty
|
||||
ret[meta.qual] = NewDoltStats()
|
||||
ret[meta.qual].Statistic.Created = time.Now()
|
||||
ret[meta.qual].Statistic.Cols = meta.cols
|
||||
ret[meta.qual].Statistic.Typs = types
|
||||
ret[meta.qual].Statistic.Qual = meta.qual
|
||||
|
||||
ret[meta.qual].Statistic.Fds = fds
|
||||
ret[meta.qual].Statistic.Colset = colSet
|
||||
ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
firstRow, err := firstRowForIndex(ctx, prollyMap, keyBuilder, len(meta.cols))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc())
|
||||
ret[meta.qual] = NewDoltStats()
|
||||
ret[meta.qual].Chunks = meta.allAddrs
|
||||
ret[meta.qual].Statistic.Created = time.Now()
|
||||
ret[meta.qual].Statistic.Cols = meta.cols
|
||||
ret[meta.qual].Statistic.Typs = types
|
||||
ret[meta.qual].Statistic.Qual = meta.qual
|
||||
ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
|
||||
|
||||
var start, stop uint64
|
||||
// read leaf rows for each bucket
|
||||
for i, chunk := range meta.newNodes {
|
||||
// each node is a bucket
|
||||
updater.newBucket()
|
||||
|
||||
// we read exclusive range [node first key, next node first key)
|
||||
start, stop = meta.updateOrdinals[i].start, meta.updateOrdinals[i].stop
|
||||
iter, err := prollyMap.IterOrdinalRange(ctx, start, stop)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for {
|
||||
// stats key will be a prefix of the index key
|
||||
keyBytes, _, err := iter.Next(ctx)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// build full key
|
||||
for i := range keyBuilder.Desc.Types {
|
||||
keyBuilder.PutRaw(i, keyBytes.GetField(i))
|
||||
}
|
||||
|
||||
updater.add(ctx, keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
|
||||
keyBuilder.Recycle()
|
||||
}
|
||||
|
||||
// finalize the aggregation
|
||||
bucket, err := updater.finalize(ctx, prollyMap.NodeStore())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bucket.Chunk = chunk.HashOf()
|
||||
ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket)
|
||||
}
|
||||
|
||||
ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct)
|
||||
ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount)
|
||||
ret[updater.qual].Statistic.LowerBnd = firstRow
|
||||
ret[updater.qual].Statistic.Fds = fds
|
||||
ret[updater.qual].Statistic.Colset = colSet
|
||||
ret[updater.qual].UpdateActive()
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// MergeNewChunks combines a set of old and new chunks to create
|
||||
// the desired target histogram. Undefined behavior if a |targetHash|
|
||||
// does not exist in either |oldChunks| or |newChunks|.
|
||||
func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) {
|
||||
hashToPos := make(map[hash.Hash]int, len(inputHashes))
|
||||
for i, h := range inputHashes {
|
||||
hashToPos[h] = i
|
||||
}
|
||||
|
||||
var cnt int
|
||||
targetBuckets := make([]sql.HistogramBucket, len(inputHashes))
|
||||
for _, c := range oldChunks {
|
||||
if idx, ok := hashToPos[DoltBucketChunk(c)]; ok {
|
||||
cnt++
|
||||
targetBuckets[idx] = c
|
||||
}
|
||||
}
|
||||
for _, c := range newChunks {
|
||||
if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil {
|
||||
cnt++
|
||||
targetBuckets[idx] = c
|
||||
}
|
||||
}
|
||||
if cnt != len(inputHashes) {
|
||||
return nil, fmt.Errorf("encountered invalid statistic chunks")
|
||||
}
|
||||
return targetBuckets, nil
|
||||
}
|
||||
|
||||
func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder, prefixLen int) (sql.Row, error) {
|
||||
func firstRowForIndex(ctx *sql.Context, idxLen int, prollyMap prolly.Map, keyBuilder *val.TupleBuilder) (sql.Row, error) {
|
||||
if cnt, err := prollyMap.Count(); err != nil {
|
||||
return nil, err
|
||||
} else if cnt == 0 {
|
||||
@@ -211,9 +54,9 @@ func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.Tu
|
||||
keyBuilder.PutRaw(i, keyBytes.GetField(i))
|
||||
}
|
||||
|
||||
firstKey := keyBuilder.BuildPrefixNoRecycle(buffPool, prefixLen)
|
||||
firstRow := make(sql.Row, prefixLen)
|
||||
for i := 0; i < prefixLen; i++ {
|
||||
firstKey := keyBuilder.Build(buffPool)
|
||||
firstRow := make(sql.Row, idxLen)
|
||||
for i := range firstRow {
|
||||
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -269,7 +112,7 @@ func (u *bucketBuilder) newBucket() {
|
||||
|
||||
// finalize converts the current aggregation stats into a histogram bucket,
|
||||
// which includes deserializing most common value tuples into sql.Rows.
|
||||
func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBucket, error) {
|
||||
func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (*stats.Bucket, error) {
|
||||
// update MCV in case we've ended on a run of many identical keys
|
||||
u.updateMcv()
|
||||
|
||||
@@ -279,27 +122,25 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu
|
||||
// convert the MCV tuples into SQL rows (most efficient to only do this once)
|
||||
mcvRows, err := u.mcvs.Values(ctx, u.tupleDesc, ns, u.prefixLen)
|
||||
if err != nil {
|
||||
return DoltBucket{}, err
|
||||
return nil, err
|
||||
}
|
||||
upperBound := make(sql.Row, u.prefixLen)
|
||||
if u.currentKey != nil {
|
||||
for i := 0; i < u.prefixLen; i++ {
|
||||
upperBound[i], err = tree.GetField(ctx, u.tupleDesc, i, u.currentKey, ns)
|
||||
if err != nil {
|
||||
return DoltBucket{}, err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return DoltBucket{
|
||||
Bucket: &stats.Bucket{
|
||||
RowCnt: uint64(u.count),
|
||||
DistinctCnt: uint64(u.distinct),
|
||||
BoundCnt: uint64(u.currentCnt),
|
||||
McvVals: mcvRows,
|
||||
McvsCnt: u.mcvs.Counts(),
|
||||
BoundVal: upperBound,
|
||||
NullCnt: uint64(u.nulls),
|
||||
},
|
||||
return &stats.Bucket{
|
||||
RowCnt: uint64(u.count),
|
||||
DistinctCnt: uint64(u.distinct),
|
||||
BoundCnt: uint64(u.currentCnt),
|
||||
McvVals: mcvRows,
|
||||
McvsCnt: u.mcvs.Counts(),
|
||||
BoundVal: upperBound,
|
||||
NullCnt: uint64(u.nulls),
|
||||
}, nil
|
||||
}
|
||||
|
||||
+20
-20
@@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Dolthub, Inc.
|
||||
// Copyright 2023-2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -61,27 +61,27 @@ func TestBucketBuilder(t *testing.T) {
|
||||
name string
|
||||
keys []sql.Row
|
||||
keyDesc val.TupleDesc
|
||||
bucket DoltBucket
|
||||
bucket *stats.Bucket
|
||||
}{
|
||||
{
|
||||
name: "ints",
|
||||
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 5,
|
||||
McvVals: []sql.Row{},
|
||||
McvsCnt: []uint64{},
|
||||
BoundVal: sql.Row{int64(5)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
// technically nulls should be at beginning
|
||||
name: "ints with middle nulls",
|
||||
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 16,
|
||||
DistinctCnt: 6,
|
||||
NullCnt: 3,
|
||||
@@ -89,13 +89,13 @@ func TestBucketBuilder(t *testing.T) {
|
||||
McvsCnt: []uint64{},
|
||||
BoundVal: sql.Row{int64(5)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ints with beginning nulls",
|
||||
keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 6,
|
||||
NullCnt: 2,
|
||||
@@ -103,86 +103,86 @@ func TestBucketBuilder(t *testing.T) {
|
||||
McvsCnt: []uint64{},
|
||||
BoundVal: sql.Row{int64(5)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "more ints",
|
||||
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 22,
|
||||
DistinctCnt: 7,
|
||||
BoundCnt: 1,
|
||||
McvVals: []sql.Row{},
|
||||
McvsCnt: []uint64{},
|
||||
BoundVal: sql.Row{int64(7)},
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "2-ints",
|
||||
keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 11,
|
||||
McvVals: []sql.Row{{int64(4), int64(1)}},
|
||||
McvsCnt: []uint64{3},
|
||||
BoundVal: sql.Row{int64(5), int64(2)},
|
||||
BoundCnt: 1,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "2-ints with nulls",
|
||||
keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 5,
|
||||
DistinctCnt: 5,
|
||||
NullCnt: 3,
|
||||
McvVals: []sql.Row{},
|
||||
McvsCnt: []uint64{},
|
||||
BoundVal: sql.Row{int64(2), int64(2)},
|
||||
BoundCnt: 1},
|
||||
BoundCnt: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "varchars",
|
||||
keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 9,
|
||||
McvVals: []sql.Row{},
|
||||
McvsCnt: []uint64{},
|
||||
BoundVal: sql.Row{"i"},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "varchar-ints",
|
||||
keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 15,
|
||||
DistinctCnt: 12,
|
||||
McvVals: []sql.Row{},
|
||||
McvsCnt: []uint64{},
|
||||
BoundVal: sql.Row{"i", int64(1)},
|
||||
BoundCnt: 2,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "mcvs",
|
||||
keys: []sql.Row{{1}, {2}, {3}, {4}, {5}, {6}, {7}, {7}, {7}, {7}, {8}, {9}, {10}, {10}, {10}, {11}, {12}, {13}, {14}, {15}, {20}, {21}, {22}},
|
||||
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
|
||||
bucket: DoltBucket{Bucket: &stats.Bucket{
|
||||
bucket: &stats.Bucket{
|
||||
RowCnt: 23,
|
||||
DistinctCnt: 18,
|
||||
McvVals: []sql.Row{{int64(10)}, {int64(7)}},
|
||||
McvsCnt: []uint64{3, 4},
|
||||
BoundVal: sql.Row{int64(22)},
|
||||
BoundCnt: 1,
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,161 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
types2 "github.com/dolthub/go-mysql-server/sql/types"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
)
|
||||
|
||||
var helpMsg = "call dolt_stats_purge() to reset statistics"
|
||||
|
||||
func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Context) (*sql.Context, error), bThreads *sql.BackgroundThreads, dbs []dsess.SqlDatabase) error {
|
||||
p.SetStarter(NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
|
||||
|
||||
if _, disabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly); disabled == int8(1) {
|
||||
return nil
|
||||
}
|
||||
|
||||
loadCtx, err := ctxFactory(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sql.SessionEnd(loadCtx.Session)
|
||||
sql.SessionCommandBegin(loadCtx.Session)
|
||||
defer sql.SessionCommandEnd(loadCtx.Session)
|
||||
|
||||
branches := p.getStatsBranches(loadCtx)
|
||||
|
||||
var autoEnabled bool
|
||||
var startupEnabled bool
|
||||
var intervalSec time.Duration
|
||||
var thresholdf64 float64
|
||||
if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) {
|
||||
autoEnabled = true
|
||||
_, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
|
||||
_, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
|
||||
interval64, _, _ := types2.Int64.Convert(interval)
|
||||
intervalSec = time.Second * time.Duration(interval64.(int64))
|
||||
thresholdf64 = threshold.(float64)
|
||||
|
||||
p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
|
||||
p.pro.DropDatabaseHooks = append([]sqle.DropDatabaseHook{NewStatsDropDatabaseHook(p)}, p.pro.DropDatabaseHooks...)
|
||||
} else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) {
|
||||
startupEnabled = true
|
||||
}
|
||||
|
||||
eg, ctx := loadCtx.NewErrgroup()
|
||||
for _, db := range dbs {
|
||||
// copy closure variables
|
||||
db := db
|
||||
eg.Go(func() (err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
if str, ok := r.(fmt.Stringer); ok {
|
||||
err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String())
|
||||
} else {
|
||||
err = fmt.Errorf("%w: %v", ErrFailedToLoad, r)
|
||||
}
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
fs, err := p.pro.FileSystemForDatabase(db.Name())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if p.Load(loadCtx, fs, db, branches); err != nil {
|
||||
return err
|
||||
}
|
||||
if autoEnabled {
|
||||
return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches)
|
||||
} else if startupEnabled {
|
||||
if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
// getStatsBranches returns the set of branches whose statistics are tracked.
|
||||
// The order of precedence is (1) global variable, (2) session current branch,
|
||||
// (3) engine default branch.
|
||||
func (p *Provider) getStatsBranches(ctx *sql.Context) []string {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
var branches []string
|
||||
if _, bs, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranches); bs == "" {
|
||||
defaultBranch, _ := dSess.GetBranch()
|
||||
if defaultBranch != "" {
|
||||
branches = append(branches, defaultBranch)
|
||||
}
|
||||
} else {
|
||||
for _, branch := range strings.Split(bs.(string), ",") {
|
||||
branches = append(branches, strings.TrimSpace(branch))
|
||||
}
|
||||
}
|
||||
|
||||
if branches == nil {
|
||||
branches = append(branches, p.pro.DefaultBranch())
|
||||
}
|
||||
return branches
|
||||
}
|
||||
|
||||
func (p *Provider) LoadStats(ctx *sql.Context, db, branch string) error {
|
||||
if statDb, ok := p.getStatDb(db); ok {
|
||||
return statDb.LoadBranchStats(ctx, branch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Load scans the statistics tables, populating the |stats| attribute.
|
||||
// Statistics are not available for reading until we've finished loading.
|
||||
func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDatabase, branches []string) {
|
||||
// |statPath| is either file://./stat or mem://stat
|
||||
statsDb, err := p.sf.Init(ctx, db, p.pro, fs, env.GetCurrentUserHomeDir)
|
||||
if err != nil {
|
||||
ctx.GetLogger().Errorf("initialize stats failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
|
||||
return
|
||||
}
|
||||
|
||||
for _, branch := range branches {
|
||||
if err = statsDb.LoadBranchStats(ctx, branch); err != nil {
|
||||
// if branch name is invalid, continue loading rest
|
||||
// TODO: differentiate bad branch name from other errors
|
||||
ctx.GetLogger().Errorf("load stats init failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
|
||||
continue
|
||||
}
|
||||
if err := statsDb.Flush(ctx, branch); err != nil {
|
||||
ctx.GetLogger().Errorf("load stats flush failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
p.setStatDb(strings.ToLower(db.Name()), statsDb)
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,630 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/dolthub/dolt/go/cmd/dolt/doltversion"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro/jobqueue"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/earl"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
var _ sql.StatsProvider = (*StatsController)(nil)
|
||||
|
||||
type ctxFactory func(ctx context.Context) (*sql.Context, error)
|
||||
|
||||
type tableIndexesKey struct {
|
||||
db string
|
||||
branch string
|
||||
table string
|
||||
schema string
|
||||
}
|
||||
|
||||
func (k tableIndexesKey) String() string {
|
||||
if k.table != "" {
|
||||
return k.schema + "/" + k.db + "/" + k.branch + "/" + k.table
|
||||
}
|
||||
return k.db + "/" + k.branch + "/" + k.table
|
||||
}
|
||||
|
||||
type StatsController struct {
|
||||
logger *logrus.Logger
|
||||
pro *sqle.DoltDatabaseProvider
|
||||
bgThreads *sql.BackgroundThreads
|
||||
statsBackingDb filesys.Filesys
|
||||
hdpEnv *env.DoltEnv
|
||||
|
||||
dbFs map[string]filesys.Filesys
|
||||
|
||||
// ctxGen lets us fetch the most recent working root
|
||||
ctxGen ctxFactory
|
||||
|
||||
sq *jobqueue.SerialQueue
|
||||
|
||||
activeCtxCancel context.CancelFunc
|
||||
listeners []listener
|
||||
|
||||
JobInterval time.Duration
|
||||
gcInterval time.Duration
|
||||
memOnly bool
|
||||
enableGc bool
|
||||
doGc bool
|
||||
Debug bool
|
||||
closed chan struct{}
|
||||
|
||||
// kv is a content-addressed cache of histogram objects:
|
||||
// buckets, first bounds, and schema-specific statistic
|
||||
// templates.
|
||||
kv StatsKv
|
||||
// Stats tracks table statistics accessible to sessions.
|
||||
Stats *rootStats
|
||||
// mu protects all shared object access
|
||||
mu sync.Mutex
|
||||
// genCnt is used to atomically swap Stats, same behavior
|
||||
// as last-writer wins
|
||||
genCnt atomic.Uint64
|
||||
gcCnt int
|
||||
}
|
||||
|
||||
type rootStats struct {
|
||||
hashes map[tableIndexesKey]hash.Hash
|
||||
stats map[tableIndexesKey][]*stats.Statistic
|
||||
DbCnt int `json:"dbCnt"`
|
||||
BucketWrites int `json:"bucketWrites"`
|
||||
TablesProcessed int `json:"tablesProcessed"`
|
||||
TablesSkipped int `json:"tablesSkipped"`
|
||||
}
|
||||
|
||||
func newRootStats() *rootStats {
|
||||
return &rootStats{
|
||||
hashes: make(map[tableIndexesKey]hash.Hash),
|
||||
stats: make(map[tableIndexesKey][]*stats.Statistic),
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *rootStats) String() string {
|
||||
str, _ := json.Marshal(rs)
|
||||
return string(str)
|
||||
}
|
||||
|
||||
func NewStatsController(logger *logrus.Logger, dEnv *env.DoltEnv) *StatsController {
|
||||
sq := jobqueue.NewSerialQueue().WithErrorCb(func(err error) {
|
||||
logger.Error(err)
|
||||
})
|
||||
|
||||
return &StatsController{
|
||||
mu: sync.Mutex{},
|
||||
logger: logger,
|
||||
JobInterval: 500 * time.Millisecond,
|
||||
gcInterval: 24 * time.Hour,
|
||||
sq: sq,
|
||||
Stats: newRootStats(),
|
||||
dbFs: make(map[string]filesys.Filesys),
|
||||
closed: make(chan struct{}),
|
||||
kv: NewMemStats(),
|
||||
hdpEnv: dEnv,
|
||||
genCnt: atomic.Uint64{},
|
||||
}
|
||||
}
|
||||
|
||||
func (sc *StatsController) SetBackgroundThreads(bgThreads *sql.BackgroundThreads) {
|
||||
sc.bgThreads = bgThreads
|
||||
}
|
||||
|
||||
func (sc *StatsController) SetMemOnly(v bool) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
sc.memOnly = v
|
||||
}
|
||||
|
||||
func (sc *StatsController) SetEnableGc(v bool) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
sc.enableGc = v
|
||||
}
|
||||
|
||||
func (sc *StatsController) setDoGc(force bool) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
if sc.enableGc || force {
|
||||
sc.doGc = true
|
||||
}
|
||||
}
|
||||
|
||||
func (sc *StatsController) gcIsSet() bool {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.doGc
|
||||
}
|
||||
|
||||
// SetTimers can only be called after Init
|
||||
func (sc *StatsController) SetTimers(job, gc int64) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
sc.sq.NewRateLimit(time.Duration(max(1, job)))
|
||||
sc.gcInterval = time.Duration(gc)
|
||||
}
|
||||
|
||||
func (sc *StatsController) AddFs(ctx *sql.Context, db dsess.SqlDatabase, fs filesys.Filesys, rotateOk bool) error {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
|
||||
firstDb := len(sc.dbFs) == 0
|
||||
sc.dbFs[db.AliasedName()] = fs
|
||||
if rotateOk && firstDb {
|
||||
return sc.lockedRotateStorage(ctx)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) Info(ctx context.Context) (dprocedures.StatsInfo, error) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
|
||||
// don't use protected access / deadlock
|
||||
cachedBucketCnt := sc.kv.Len()
|
||||
storageCnt, err := sc.kv.Flush(ctx)
|
||||
if err != nil {
|
||||
return dprocedures.StatsInfo{}, err
|
||||
}
|
||||
|
||||
var cachedBoundCnt int
|
||||
var cachedTemplateCnt int
|
||||
var backing string
|
||||
switch kv := sc.kv.(type) {
|
||||
case *memStats:
|
||||
cachedBoundCnt = len(kv.bounds)
|
||||
cachedTemplateCnt = len(kv.templates)
|
||||
backing = "memory"
|
||||
case *prollyStats:
|
||||
cachedBoundCnt = len(kv.mem.bounds)
|
||||
cachedTemplateCnt = len(kv.mem.templates)
|
||||
backing, _ = sc.statsBackingDb.Abs("")
|
||||
}
|
||||
return dprocedures.StatsInfo{
|
||||
DbCnt: sc.Stats.DbCnt,
|
||||
Active: sc.activeCtxCancel != nil,
|
||||
CachedBucketCnt: cachedBucketCnt,
|
||||
StorageBucketCnt: storageCnt,
|
||||
CachedBoundCnt: cachedBoundCnt,
|
||||
CachedTemplateCnt: cachedTemplateCnt,
|
||||
StatCnt: len(sc.Stats.stats),
|
||||
GenCnt: int(sc.genCnt.Load()),
|
||||
GcCnt: sc.gcCnt,
|
||||
Backing: filepath.Base(backing),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) descError(d string, err error) {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
if sc.Debug {
|
||||
log.Println("stats error: ", err.Error())
|
||||
}
|
||||
b := strings.Builder{}
|
||||
b.WriteString("stats error;")
|
||||
if d != "" {
|
||||
b.WriteString("; " + d)
|
||||
}
|
||||
if err != nil {
|
||||
b.WriteString("; " + err.Error())
|
||||
}
|
||||
sc.logger.Debug(b.String())
|
||||
}
|
||||
|
||||
func (sc *StatsController) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
|
||||
key, err := sc.statsKey(ctx, db, table.Name())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
if sc.Stats == nil {
|
||||
return nil, nil
|
||||
}
|
||||
st := sc.Stats.stats[key]
|
||||
var ret []sql.Statistic
|
||||
for _, s := range st {
|
||||
ret = append(ret, s)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) AnalyzeTable(ctx *sql.Context, table sql.Table, dbName string) (err error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
|
||||
var branch string
|
||||
if strings.Contains(dbName, "/") {
|
||||
parts := strings.Split(dbName, "/")
|
||||
if len(parts) == 2 {
|
||||
dbName = parts[0]
|
||||
branch = parts[1]
|
||||
}
|
||||
}
|
||||
if branch == "" {
|
||||
var err error
|
||||
branch, err = dSess.GetBranch()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if branch == "" {
|
||||
branch = env.DefaultInitBranch
|
||||
}
|
||||
}
|
||||
|
||||
db, err := sc.pro.Database(ctx, dbName)
|
||||
sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), branch, branch+"/"+dbName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newStats := newRootStats()
|
||||
err = sc.updateTable(ctx, newStats, table.Name(), sqlDb, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sc.mu.Lock()
|
||||
for k, v := range newStats.stats {
|
||||
sc.Stats.stats[k] = v
|
||||
sc.Stats.hashes[k] = newStats.hashes[k]
|
||||
}
|
||||
sc.mu.Unlock()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (sc *StatsController) SetStats(ctx *sql.Context, s sql.Statistic) error {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
ss, ok := s.(*stats.Statistic)
|
||||
if !ok {
|
||||
return fmt.Errorf("expected *stats.Statistics, found %T", s)
|
||||
}
|
||||
key, err := sc.statsKey(ctx, ss.Qualifier().Db(), ss.Qualifier().Table())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// not efficient, but this is only used for testing
|
||||
var newStats []*stats.Statistic
|
||||
for _, ss := range sc.Stats.stats[key] {
|
||||
if !strings.EqualFold(ss.Qualifier().Index(), s.Qualifier().Index()) {
|
||||
newStats = append(newStats, ss)
|
||||
}
|
||||
}
|
||||
newStats = append(newStats, ss)
|
||||
sc.Stats.stats[key] = newStats
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
key, err := sc.statsKey(ctx, qual.Database, qual.Table())
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
for _, s := range sc.Stats.stats[key] {
|
||||
if strings.EqualFold(s.Qualifier().Index(), qual.Index()) {
|
||||
return s, true
|
||||
}
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (sc *StatsController) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) {
|
||||
key := tableIndexesKey{
|
||||
db: strings.ToLower(db),
|
||||
branch: strings.ToLower(branch),
|
||||
table: strings.ToLower(table),
|
||||
schema: strings.ToLower(schema),
|
||||
}
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
if sc.Stats == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return sc.Stats.stats[key], nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
|
||||
key, err := sc.statsKey(ctx, qual.Database, qual.Table())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
delete(sc.Stats.stats, key)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) DropDbStats(ctx *sql.Context, dbName string, flush bool) error {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
|
||||
dbFs := sc.dbFs[dbName]
|
||||
delete(sc.dbFs, dbName)
|
||||
if sc.statsBackingDb == dbFs {
|
||||
// don't wait to see if the thread context is invalidated
|
||||
func() {
|
||||
sc.mu.Unlock()
|
||||
sc.Restart()
|
||||
defer sc.mu.Lock()
|
||||
}()
|
||||
if err := sc.lockedRotateStorage(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
var deleteKeys []tableIndexesKey
|
||||
for k, _ := range sc.Stats.stats {
|
||||
if strings.EqualFold(dbName, k.db) {
|
||||
deleteKeys = append(deleteKeys, k)
|
||||
}
|
||||
}
|
||||
for _, k := range deleteKeys {
|
||||
delete(sc.Stats.stats, k)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) statsKey(ctx *sql.Context, dbName, table string) (tableIndexesKey, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return tableIndexesKey{}, err
|
||||
}
|
||||
key := tableIndexesKey{
|
||||
db: strings.ToLower(dbName),
|
||||
branch: strings.ToLower(branch),
|
||||
table: strings.ToLower(table),
|
||||
}
|
||||
return key, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) RowCount(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
|
||||
key, err := sc.statsKey(ctx, dbName, table.Name())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
for _, s := range sc.Stats.stats[key] {
|
||||
if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
|
||||
return s.RowCnt, nil
|
||||
}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) DataLength(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
|
||||
key, err := sc.statsKey(ctx, dbName, table.Name())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
for _, s := range sc.Stats.stats[key] {
|
||||
if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
|
||||
return s.RowCnt, nil
|
||||
}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) Purge(ctx *sql.Context) error {
|
||||
genStart := sc.genCnt.Load()
|
||||
newKv := NewMemStats()
|
||||
newKv.gcGen = genStart
|
||||
newStats := newRootStats()
|
||||
if ok, err := sc.trySwapStats(ctx, genStart, newStats, newKv); !ok {
|
||||
return fmt.Errorf("failed to purge stats")
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) rotateStorage(ctx context.Context) error {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.lockedRotateStorage(ctx)
|
||||
}
|
||||
|
||||
func (sc *StatsController) lockedRotateStorage(ctx context.Context) error {
|
||||
if sc.memOnly {
|
||||
return nil
|
||||
}
|
||||
if sc.statsBackingDb != nil {
|
||||
if err := sc.rm(sc.statsBackingDb); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
var mem *memStats
|
||||
switch kv := sc.kv.(type) {
|
||||
case *prollyStats:
|
||||
mem = kv.mem
|
||||
case *memStats:
|
||||
mem = kv
|
||||
default:
|
||||
mem = NewMemStats()
|
||||
}
|
||||
|
||||
if len(sc.dbFs) == 0 {
|
||||
sc.kv = mem
|
||||
sc.statsBackingDb = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
var newStorageTarget filesys.Filesys
|
||||
for _, dbFs := range sc.dbFs {
|
||||
newStorageTarget = dbFs
|
||||
if newStorageTarget == sc.statsBackingDb {
|
||||
// prefer continuity when possible
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err := sc.rm(newStorageTarget); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sqlCtx, err := sc.ctxGen(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sql.SessionEnd(sqlCtx.Session)
|
||||
sql.SessionCommandBegin(sqlCtx.Session)
|
||||
defer sql.SessionCommandEnd(sqlCtx.Session)
|
||||
|
||||
newKv, err := sc.initStorage(sqlCtx, newStorageTarget)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newKv.mem = mem
|
||||
sc.kv = newKv
|
||||
sc.statsBackingDb = newStorageTarget
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) rm(fs filesys.Filesys) error {
|
||||
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if ok, _ := statsFs.Exists(""); ok {
|
||||
if err := statsFs.Delete("", true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
dropDbLoc, err := statsFs.Abs("")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//log.Println("rm", dropDbLoc)
|
||||
|
||||
if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) initStorage(ctx context.Context, fs filesys.Filesys) (*prollyStats, error) {
|
||||
if sc.hdpEnv == nil {
|
||||
return nil, fmt.Errorf("cannot initialize *prollKv, missing homeDirProvider")
|
||||
}
|
||||
params := make(map[string]interface{})
|
||||
params[dbfactory.GRPCDialProviderParam] = env.NewGRPCDialProviderFromDoltEnv(sc.hdpEnv)
|
||||
|
||||
var urlPath string
|
||||
u, err := earl.Parse(sc.pro.DbFactoryUrl())
|
||||
if u.Scheme == dbfactory.MemScheme {
|
||||
urlPath = path.Join(sc.pro.DbFactoryUrl(), dbfactory.DoltDataDir)
|
||||
} else if u.Scheme == dbfactory.FileScheme {
|
||||
urlPath = doltdb.LocalDirDoltDB
|
||||
}
|
||||
|
||||
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var dEnv *env.DoltEnv
|
||||
exists, isDir := statsFs.Exists("")
|
||||
if !exists {
|
||||
err := statsFs.MkDirs("")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
|
||||
}
|
||||
|
||||
dEnv = env.Load(ctx, sc.hdpEnv.GetUserHomeDir, statsFs, urlPath, "test")
|
||||
err = dEnv.InitRepo(ctx, types.Format_Default, "stats", "stats@stats.com", env.DefaultInitBranch)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if !isDir {
|
||||
return nil, fmt.Errorf("file exists where the dolt stats directory should be")
|
||||
} else {
|
||||
dEnv = env.LoadWithoutDB(ctx, sc.hdpEnv.GetUserHomeDir, statsFs, "", doltversion.Version)
|
||||
}
|
||||
|
||||
if err := dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
deaf := dEnv.DbEaFactory(ctx)
|
||||
|
||||
tmpDir, err := dEnv.TempTableFilesDir()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
opts := editor.Options{
|
||||
Deaf: deaf,
|
||||
Tempdir: tmpDir,
|
||||
}
|
||||
|
||||
statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m, err := dEnv.DbData(ctx).Ddb.GetStatistics(ctx)
|
||||
if err == nil {
|
||||
// use preexisting map
|
||||
kd, vd := m.Descriptors()
|
||||
return &prollyStats{
|
||||
mu: sync.Mutex{},
|
||||
destDb: statsDb,
|
||||
kb: val.NewTupleBuilder(kd),
|
||||
vb: val.NewTupleBuilder(vd),
|
||||
m: m.Mutate(),
|
||||
mem: NewMemStats(),
|
||||
}, nil
|
||||
}
|
||||
return NewProllyStats(ctx, statsDb)
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
// Package statspro provides a queue that manages table statistics
|
||||
// management and access.
|
||||
//
|
||||
// At any given time there is one work generating thread, one scheduling
|
||||
// thread, and one execution thread.
|
||||
//
|
||||
// The worker loop fetches the most recent session root,
|
||||
// reads all of its databases/tables/ indexes, collects statistics
|
||||
// for those objects, and updates the shared statistics state. Every
|
||||
// cycle replaces the shared state.
|
||||
//
|
||||
// Work is delegated to the scheduler thread, which serializes
|
||||
// issuer jobs with concurrent async requests, and rate limits sending
|
||||
// jobs to the execution thread. The execution thread completes
|
||||
// function callbacks.
|
||||
//
|
||||
// GC occurs within an update cycle. Through a cycle GC populates an
|
||||
// in-memory cache with the complete and exclusive set of values of
|
||||
// the new shared statistics object. Both are atomically swapped using
|
||||
// a generation counter (which may or may not be necessary, but is one
|
||||
// of several guards against surprising concurrent changes).
|
||||
//
|
||||
// Concurrent issuer threads are further restrained with a context list
|
||||
// that at most one thread owns. There are two contexts, one for the
|
||||
// thread and another for the specific update cycle. Listeners (like wait)
|
||||
// use the second context to follow update cycles. Concurrent restarts
|
||||
// cancel and replace the previous owner's contexts with their own. Atomic
|
||||
// shared state swaps are likewise guarded on the issuer's context
|
||||
// integrity.
|
||||
//
|
||||
// All stats are persisted within a single database in the `.dolt/stats`
|
||||
// folder separate from user data. If there are multiple databases,
|
||||
// one is selected by random as the storage target. If during
|
||||
// initialization multiple databases have stats, one will be chosen
|
||||
// by random as the target. If a database changes between server
|
||||
// restarts, the storage stats will be useless but not impair regular
|
||||
// operations because storage is only ever a best-effort
|
||||
// content-addressed persistence layer; buckets will be regenerated if
|
||||
// they are missing. If the database acting as a storage target is
|
||||
// deleted, we swap the cache and write to a new storage target.
|
||||
//
|
||||
// The main data structures:
|
||||
// - Table statistics map, that returns a list of table index statistics
|
||||
// for a specific branch, database, and table name.
|
||||
// - Object caches:
|
||||
// - Bucket cache: Chunk addressed hash map. All provider histogram
|
||||
// references point to objects in the bucket cache. Backed by a
|
||||
// best-effort on-disk prolly.Map to make restarts faster.
|
||||
// - Template cache: Table-schema/index addressed stats.Statistics object
|
||||
// for a specific index.
|
||||
// - Bound cache: Chunk addressed first row for an index histogram.
|
||||
//
|
||||
// The stats lifecycle can be controlled with:
|
||||
// - dolt_stats_stop: clear queue and disable thread
|
||||
// - dolt_stats_restart: clear queue, refresh queue, start thread
|
||||
// - dolt_stats_purge: clear queue, refresh queue, clear cache,
|
||||
// disable thread
|
||||
// - dolt_stats_once: collect statistics once, ex: in sql-shell
|
||||
// - dolt_stats_wait: block on a full queue cycle
|
||||
// - dolt_stats_gc: block waiting for a GC signal
|
||||
// - dolt_stats_flush: block waiting for a flush signal
|
||||
//
|
||||
@@ -1,290 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
type DoltStats struct {
|
||||
Statistic *stats.Statistic
|
||||
mu *sync.Mutex
|
||||
// Chunks is a list of addresses for the histogram fanout level
|
||||
Chunks []hash.Hash
|
||||
// Active maps a chunk/bucket address to its position in
|
||||
// the histogram. 1-indexed to differentiate from an empty
|
||||
// field on disk
|
||||
Active map[hash.Hash]int
|
||||
Hist sql.Histogram
|
||||
Tb *val.TupleBuilder
|
||||
}
|
||||
|
||||
func (s *DoltStats) Clone(_ context.Context) sql.JSONWrapper {
|
||||
return s
|
||||
}
|
||||
|
||||
var _ sql.Statistic = (*DoltStats)(nil)
|
||||
|
||||
func (s *DoltStats) SetChunks(h []hash.Hash) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.Chunks = h
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithRowCount(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithNullCount(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic {
|
||||
ret := *s
|
||||
ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic)
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s *DoltStats) RowCount() uint64 {
|
||||
return s.Statistic.RowCount()
|
||||
}
|
||||
|
||||
func (s *DoltStats) DistinctCount() uint64 {
|
||||
return s.Statistic.DistinctCount()
|
||||
}
|
||||
|
||||
func (s *DoltStats) NullCount() uint64 {
|
||||
return s.Statistic.NullCount()
|
||||
|
||||
}
|
||||
|
||||
func (s *DoltStats) AvgSize() uint64 {
|
||||
return s.Statistic.AvgSize()
|
||||
|
||||
}
|
||||
|
||||
func (s *DoltStats) CreatedAt() time.Time {
|
||||
return s.Statistic.CreatedAt()
|
||||
|
||||
}
|
||||
|
||||
func (s *DoltStats) Columns() []string {
|
||||
return s.Statistic.Columns()
|
||||
}
|
||||
|
||||
func (s *DoltStats) Types() []sql.Type {
|
||||
return s.Statistic.Types()
|
||||
}
|
||||
|
||||
func (s *DoltStats) Qualifier() sql.StatQualifier {
|
||||
return s.Statistic.Qualifier()
|
||||
}
|
||||
|
||||
func (s *DoltStats) IndexClass() sql.IndexClass {
|
||||
return s.Statistic.IndexClass()
|
||||
}
|
||||
|
||||
func (s *DoltStats) FuncDeps() *sql.FuncDepSet {
|
||||
return s.Statistic.FuncDeps()
|
||||
}
|
||||
|
||||
func (s *DoltStats) ColSet() sql.ColSet {
|
||||
return s.Statistic.ColSet()
|
||||
}
|
||||
|
||||
func (s *DoltStats) LowerBound() sql.Row {
|
||||
return s.Statistic.LowerBound()
|
||||
}
|
||||
|
||||
func NewDoltStats() *DoltStats {
|
||||
return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}}
|
||||
}
|
||||
|
||||
func (s *DoltStats) ToInterface() (interface{}, error) {
|
||||
statVal, err := s.Statistic.ToInterface()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret := statVal.(map[string]interface{})
|
||||
|
||||
var hist sql.Histogram
|
||||
for _, b := range s.Hist {
|
||||
hist = append(hist, b)
|
||||
}
|
||||
histVal, err := hist.ToInterface()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret["statistic"].(map[string]interface{})["buckets"] = histVal
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
ret := *s
|
||||
ret.Hist = nil
|
||||
for _, b := range h {
|
||||
doltB, ok := b.(DoltBucket)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid bucket type: %T, %s", b, h.DebugString())
|
||||
}
|
||||
ret.Hist = append(ret.Hist, doltB)
|
||||
}
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s *DoltStats) Histogram() sql.Histogram {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.Hist
|
||||
}
|
||||
|
||||
func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) {
|
||||
hist, err := DoltHistFromSql(stat.Histogram(), stat.Types())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret := &DoltStats{
|
||||
mu: &sync.Mutex{},
|
||||
Hist: hist,
|
||||
Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()),
|
||||
Active: make(map[hash.Hash]int),
|
||||
}
|
||||
ret.Statistic.Fds = stat.FuncDeps()
|
||||
ret.Statistic.Colset = stat.ColSet()
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s *DoltStats) UpdateActive() {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
newActive := make(map[hash.Hash]int)
|
||||
for i, hash := range s.Chunks {
|
||||
newActive[hash] = i
|
||||
}
|
||||
s.Active = newActive
|
||||
}
|
||||
|
||||
type DoltHistogram []DoltBucket
|
||||
|
||||
type DoltBucket struct {
|
||||
Bucket *stats.Bucket
|
||||
Chunk hash.Hash
|
||||
Created time.Time
|
||||
}
|
||||
|
||||
func (d DoltBucket) RowCount() uint64 {
|
||||
return d.Bucket.RowCount()
|
||||
}
|
||||
|
||||
func (d DoltBucket) DistinctCount() uint64 {
|
||||
return d.Bucket.DistinctCount()
|
||||
}
|
||||
|
||||
func (d DoltBucket) NullCount() uint64 {
|
||||
return d.Bucket.NullCount()
|
||||
}
|
||||
|
||||
func (d DoltBucket) BoundCount() uint64 {
|
||||
return d.Bucket.BoundCount()
|
||||
}
|
||||
|
||||
func (d DoltBucket) UpperBound() sql.Row {
|
||||
return d.Bucket.UpperBound()
|
||||
}
|
||||
|
||||
func (d DoltBucket) McvCounts() []uint64 {
|
||||
return d.Bucket.McvCounts()
|
||||
}
|
||||
|
||||
func (d DoltBucket) Mcvs() []sql.Row {
|
||||
return d.Bucket.Mcvs()
|
||||
}
|
||||
|
||||
func DoltBucketChunk(b sql.HistogramBucket) hash.Hash {
|
||||
return b.(DoltBucket).Chunk
|
||||
}
|
||||
|
||||
func DoltBucketCreated(b sql.HistogramBucket) time.Time {
|
||||
return b.(DoltBucket).Created
|
||||
}
|
||||
|
||||
var _ sql.HistogramBucket = (*DoltBucket)(nil)
|
||||
|
||||
func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) {
|
||||
ret := make(sql.Histogram, len(hist))
|
||||
var err error
|
||||
for i, b := range hist {
|
||||
upperBound := make(sql.Row, len(b.UpperBound()))
|
||||
for i, v := range b.UpperBound() {
|
||||
upperBound[i], _, err = types[i].Convert(v)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
|
||||
}
|
||||
}
|
||||
mcvs := make([]sql.Row, len(b.Mcvs()))
|
||||
for i, mcv := range b.Mcvs() {
|
||||
for _, v := range mcv {
|
||||
conv, _, err := types[i].Convert(v)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
|
||||
}
|
||||
mcvs[i] = append(mcvs[i], conv)
|
||||
}
|
||||
}
|
||||
ret[i] = DoltBucket{
|
||||
Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs).(*stats.Bucket),
|
||||
}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -15,10 +15,6 @@
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
@@ -26,67 +22,33 @@ import (
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
)
|
||||
|
||||
func NewStatsInitDatabaseHook(
|
||||
statsProv *Provider,
|
||||
ctxFactory func(ctx context.Context) (*sql.Context, error),
|
||||
bThreads *sql.BackgroundThreads,
|
||||
) sqle.InitDatabaseHook {
|
||||
func NewInitDatabaseHook(sc *StatsController) sqle.InitDatabaseHook {
|
||||
return func(
|
||||
ctx *sql.Context,
|
||||
pro *sqle.DoltDatabaseProvider,
|
||||
_ *sqle.DoltDatabaseProvider,
|
||||
name string,
|
||||
denv *env.DoltEnv,
|
||||
db dsess.SqlDatabase,
|
||||
) error {
|
||||
dbName := strings.ToLower(db.Name())
|
||||
if statsDb, ok := statsProv.getStatDb(dbName); !ok {
|
||||
statsDb, err := statsProv.sf.Init(ctx, db, statsProv.pro, denv.FS, env.GetCurrentUserHomeDir)
|
||||
if err != nil {
|
||||
ctx.GetLogger().Debugf("statistics load error: %s", err.Error())
|
||||
return nil
|
||||
}
|
||||
statsProv.setStatDb(dbName, statsDb)
|
||||
} else {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
for _, br := range statsDb.Branches() {
|
||||
branchQDbName := BranchQualifiedDatabase(dbName, br)
|
||||
sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
|
||||
if err != nil {
|
||||
ctx.GetLogger().Logger.Errorf("branch not found: %s", br)
|
||||
continue
|
||||
}
|
||||
branchQDb, ok := sqlDb.(dsess.SqlDatabase)
|
||||
if !ok {
|
||||
return fmt.Errorf("branch/database not found: %s", branchQDbName)
|
||||
}
|
||||
|
||||
if ok, err := statsDb.SchemaChange(ctx, br, branchQDb); err != nil {
|
||||
return err
|
||||
} else if ok {
|
||||
if err := statsDb.DeleteBranchStats(ctx, br, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
ctx.GetLogger().Debugf("statistics init error: preexisting stats db: %s", dbName)
|
||||
if sc.hdpEnv == nil {
|
||||
sc.mu.Lock()
|
||||
sc.hdpEnv = denv
|
||||
sc.mu.Unlock()
|
||||
}
|
||||
ctx.GetLogger().Debugf("statistics refresh: initialize %s", name)
|
||||
return statsProv.InitAutoRefresh(ctxFactory, name, bThreads)
|
||||
sqlDb, ok := db.(sqle.Database)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
// call should only fail if backpressure in secondary queue
|
||||
return sc.AddFs(ctx, sqlDb, denv.FS, true)
|
||||
}
|
||||
}
|
||||
|
||||
func NewStatsDropDatabaseHook(statsProv *Provider) sqle.DropDatabaseHook {
|
||||
func NewDropDatabaseHook(sc *StatsController) sqle.DropDatabaseHook {
|
||||
return func(ctx *sql.Context, name string) {
|
||||
statsProv.CancelRefreshThread(name)
|
||||
if err := statsProv.DropDbStats(ctx, name, false); err != nil {
|
||||
if err := sc.DropDbStats(ctx, name, false); err != nil {
|
||||
ctx.GetLogger().Debugf("failed to close stats database: %s", err)
|
||||
}
|
||||
|
||||
if db, ok := statsProv.getStatDb(name); ok {
|
||||
if err := db.Close(); err != nil {
|
||||
ctx.GetLogger().Debugf("failed to close stats database: %s", err)
|
||||
}
|
||||
delete(statsProv.statDbs, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
|
||||
// Database is a backing store for a collection of DoltStats.
|
||||
// Each stats database tracks a user database, with multiple
|
||||
// branches potentially each having their own statistics.
|
||||
type Database interface {
|
||||
// ListStatQuals returns the list of index statistics for a branch.
|
||||
ListStatQuals(branch string) []sql.StatQualifier
|
||||
// LoadBranchStats starts tracking a specific branch's statistics.
|
||||
LoadBranchStats(ctx *sql.Context, branch string) error
|
||||
// DeleteBranchStats removes references to in memory index statistics.
|
||||
// If |flush| is true delete the data from storage.
|
||||
DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error
|
||||
// GetStat returns a branch's index statistics.
|
||||
GetStat(branch string, qual sql.StatQualifier) (*DoltStats, bool)
|
||||
//SetStat bulk replaces the statistic, deleting any previous version
|
||||
SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *DoltStats) error
|
||||
//DeleteStats deletes a list of index statistics.
|
||||
DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier)
|
||||
// ReplaceChunks is an update interface that lets a stats implementation
|
||||
// decide how to edit stats for a stats refresh.
|
||||
ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error
|
||||
// Flush instructs the database to sync any partial state to disk
|
||||
Flush(ctx context.Context, branch string) error
|
||||
// Close finalizes any file references.
|
||||
Close() error
|
||||
// SetTableHash updates the most recently tracked table stats table hash
|
||||
SetTableHash(branch, tableName string, h hash.Hash)
|
||||
// GetTableHash returns the most recently tracked table stats table hash
|
||||
GetTableHash(branch, tableName string) hash.Hash
|
||||
// SetSchemaHash updates the most recently stored table stat's schema hash
|
||||
SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error
|
||||
// GetSchemaHash returns the schema hash for the latest stored statistics
|
||||
GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error)
|
||||
// Branches returns the set of branches with tracked statistics databases
|
||||
Branches() []string
|
||||
// SchemaChange returns false if any table schema in the session
|
||||
// root is incompatible with the latest schema used to create a stored
|
||||
// set of statistics.
|
||||
SchemaChange(ctx *sql.Context, branch string, branchQdb dsess.SqlDatabase) (bool, error)
|
||||
}
|
||||
|
||||
// StatsFactory instances construct statistic databases.
|
||||
type StatsFactory interface {
|
||||
// Init gets a reference to the stats database for a dolt database
|
||||
// rooted at the given filesystem. It will create the database if
|
||||
// it does not exist.
|
||||
Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (Database, error)
|
||||
}
|
||||
@@ -0,0 +1,410 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package jobqueue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/circular"
|
||||
)
|
||||
|
||||
// A SerialQueue is a job queue which runs one job at a time. Jobs are
|
||||
// run in the order they are submitted, with the exception that every
|
||||
// interrupt job is run before any normal priority job.
|
||||
//
|
||||
// A SerialQueue can be paused, in which case it will accept new
|
||||
// submissions, but will not run them until it is started again.
|
||||
//
|
||||
// A SerialQueue can be purged, which deletes any pending jobs from
|
||||
// it.
|
||||
//
|
||||
// A SerialQueue can be stopped, in which case it will not accept new
|
||||
// submissions and no pending work will be run. Stopping a queue does
|
||||
// not purge it, but it is easy for a caller to stop and purge the
|
||||
// queue.
|
||||
//
|
||||
// A stopped or paused SerialQueue can be started, which will cause it
|
||||
// to start running submitted jobs again, including any unpurged jobs
|
||||
// which were pending when it was stopped or paused.
|
||||
//
|
||||
// A SerialQueue runs background threads to coordinate its
|
||||
// behavior. These background threads are launched with a `Context`
|
||||
// supplied to its |Run| method. If that `Context` ever becomes
|
||||
// `Done`, the SerialQueue termainally enters a completed state.
|
||||
//
|
||||
// In general, jobs running on the queue should not block indefinitely
|
||||
// and should be very careful about any synchronization. It is safe
|
||||
// for jobs within the queue to call DoAsync, InterruptAsync, Stop,
|
||||
// Pause, Purge and Start on the queue itself. It is a deadlock for a
|
||||
// job within the queue to perform a DoSync or InterruptSync on the
|
||||
// queue itself, although that deadlock may be resolved if the
|
||||
// provided |ctx| ends up |Done|.
|
||||
type SerialQueue struct {
|
||||
running atomic.Bool
|
||||
|
||||
// If the queue is terminally completed, this will be closed.
|
||||
// Submissions to the queue scheduler select on this channel
|
||||
// to return errors if the scheduler is no longer accepting
|
||||
// work.
|
||||
completed chan struct{}
|
||||
|
||||
runnerCh chan work
|
||||
schedCh chan schedReq
|
||||
errCb func(error)
|
||||
}
|
||||
|
||||
// |work| represents work to be run on the runner goroutine.
|
||||
type work struct {
|
||||
// The function to call.
|
||||
f func() error
|
||||
// The channel to close after the work is run.
|
||||
done chan struct{}
|
||||
// Update worker rate
|
||||
newRate time.Duration
|
||||
}
|
||||
|
||||
type schedState int
|
||||
|
||||
const (
|
||||
// When scheduler is running, it is willing to accept new work
|
||||
// and to give work to the work thread.
|
||||
schedState_Running schedState = iota
|
||||
// When scheduler is paused, it is willing to accept new work
|
||||
// but it does not give work to the work thread.
|
||||
schedState_Paused
|
||||
// When scheduler is stopped, it does not accept new work
|
||||
// and it does not give work to the work thread.
|
||||
schedState_Stopped
|
||||
)
|
||||
|
||||
type schedReqType int
|
||||
|
||||
const (
|
||||
schedReqType_Enqueue schedReqType = iota
|
||||
schedReqType_Purge
|
||||
schedReqType_Start
|
||||
schedReqType_Pause
|
||||
schedReqType_Stop
|
||||
)
|
||||
|
||||
type schedPriority int
|
||||
|
||||
const (
|
||||
schedPriority_Normal schedPriority = iota
|
||||
schedPriority_High
|
||||
)
|
||||
|
||||
// Incoming message for the scheduler thread.
|
||||
type schedReq struct {
|
||||
reqType schedReqType
|
||||
// Always set, the scheduler's response is
|
||||
// sent through this channel. The send
|
||||
// must never block.
|
||||
resp chan schedResp
|
||||
// Set when |reqType| is Enqueue
|
||||
pri schedPriority
|
||||
// Set when |reqType| is Enqueue
|
||||
work work
|
||||
}
|
||||
|
||||
type schedResp struct {
|
||||
err error
|
||||
}
|
||||
|
||||
var ErrStoppedQueue = errors.New("stopped queue: cannot submit work to a stopped queue.")
|
||||
var ErrCompletedQueue = errors.New("completed queue: the queue is no longer running.")
|
||||
|
||||
// Create a new serial queue. All of the methods on the returned
|
||||
// SerialQueue block indefinitely until its |Run| method is called.
|
||||
func NewSerialQueue() *SerialQueue {
|
||||
return &SerialQueue{
|
||||
completed: make(chan struct{}),
|
||||
runnerCh: make(chan work),
|
||||
schedCh: make(chan schedReq),
|
||||
}
|
||||
}
|
||||
func (s *SerialQueue) WithErrorCb(errCb func(error)) *SerialQueue {
|
||||
s.errCb = errCb
|
||||
return s
|
||||
}
|
||||
|
||||
// Run the serial queue's background threads with this |ctx|. If the
|
||||
// |ctx| ever becomes |Done|, the queue enters a terminal completed
|
||||
// state. It is an error to call this function more than once.
|
||||
func (s *SerialQueue) Run(ctx context.Context) {
|
||||
if !s.running.CompareAndSwap(false, true) {
|
||||
panic("Cannot run a SerialQueue more than once.")
|
||||
}
|
||||
defer close(s.completed)
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
s.runScheduler(ctx)
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
s.runRunner(ctx)
|
||||
}()
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// Start the queue. The queue can be in any state, including already started.
|
||||
func (s *SerialQueue) Start() error {
|
||||
return s.makeReq(schedReq{
|
||||
reqType: schedReqType_Start,
|
||||
resp: make(chan schedResp, 1),
|
||||
})
|
||||
}
|
||||
|
||||
// Pause the queue. The queue can be in any state, including already
|
||||
// paused. Note that pausing the queue does not block on any
|
||||
// currently running job to complete. A pattern to pause the queue
|
||||
// with a guarantee that nothing is currently running is:
|
||||
//
|
||||
// s.InterruptSync(context.Background(), func() { s.Pause() })
|
||||
func (s *SerialQueue) Pause() error {
|
||||
return s.makeReq(schedReq{
|
||||
reqType: schedReqType_Pause,
|
||||
resp: make(chan schedResp, 1),
|
||||
})
|
||||
}
|
||||
|
||||
// Stop the queue. The queue can be in any state, including already
|
||||
// stopped. Note that stopping the queue does not block on any
|
||||
// currently running job to complete.
|
||||
func (s *SerialQueue) Stop() error {
|
||||
return s.makeReq(schedReq{
|
||||
reqType: schedReqType_Stop,
|
||||
resp: make(chan schedResp, 1),
|
||||
})
|
||||
}
|
||||
|
||||
// Purge the queue. All pending jobs will be dropped.
|
||||
func (s *SerialQueue) Purge() error {
|
||||
return s.makeReq(schedReq{
|
||||
reqType: schedReqType_Purge,
|
||||
resp: make(chan schedResp, 1),
|
||||
})
|
||||
}
|
||||
|
||||
func (s *SerialQueue) NewRateLimit(rate time.Duration) error {
|
||||
return s.makeReq(schedReq{
|
||||
reqType: schedReqType_Enqueue,
|
||||
pri: schedPriority_High,
|
||||
work: work{
|
||||
f: func() error { return nil },
|
||||
done: make(chan struct{}),
|
||||
newRate: rate,
|
||||
},
|
||||
resp: make(chan schedResp, 1),
|
||||
})
|
||||
}
|
||||
|
||||
// Run a high priority job on the SerialQueue, blocking for its completion.
|
||||
// If done against a Paused queue, this could block indefinitely. The
|
||||
// block for completion is gated on the |ctx|.
|
||||
func (s *SerialQueue) InterruptSync(ctx context.Context, f func() error) error {
|
||||
w, err := s.submitWork(schedPriority_High, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-w.done:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return context.Cause(ctx)
|
||||
case <-s.completed:
|
||||
return ErrCompletedQueue
|
||||
}
|
||||
}
|
||||
|
||||
// Run a normal priority job on the SerialQueue, blocking for its completion.
|
||||
// When done against a paused queue, this can block indefinitely.
|
||||
func (s *SerialQueue) DoSync(ctx context.Context, f func() error) error {
|
||||
w, err := s.submitWork(schedPriority_Normal, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-w.done:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return context.Cause(ctx)
|
||||
case <-s.completed:
|
||||
return ErrCompletedQueue
|
||||
}
|
||||
}
|
||||
|
||||
// Run a high priority job asynchronously on the queue. Returns once the
|
||||
// job is accepted.
|
||||
func (s *SerialQueue) InterruptAsync(f func() error) error {
|
||||
_, err := s.submitWork(schedPriority_High, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run a normal priority job asynchronously on the queue. Returns once the
|
||||
// job is accepted.
|
||||
func (s *SerialQueue) DoAsync(f func() error) error {
|
||||
_, err := s.submitWork(schedPriority_Normal, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Helper function to submit work. Returns the work submitted, if it
|
||||
// was successful, and an error otherwise.
|
||||
func (s *SerialQueue) submitWork(pri schedPriority, f func() error) (work, error) {
|
||||
w := work{
|
||||
f: f,
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
err := s.makeReq(schedReq{
|
||||
reqType: schedReqType_Enqueue,
|
||||
pri: pri,
|
||||
work: w,
|
||||
resp: make(chan schedResp, 1),
|
||||
})
|
||||
if err != nil {
|
||||
return work{}, err
|
||||
}
|
||||
return w, nil
|
||||
}
|
||||
|
||||
func (s *SerialQueue) makeReq(req schedReq) error {
|
||||
select {
|
||||
case s.schedCh <- req:
|
||||
resp := <-req.resp
|
||||
return resp.err
|
||||
case <-s.completed:
|
||||
return ErrCompletedQueue
|
||||
}
|
||||
}
|
||||
|
||||
// Read off the input channels and maintain queues of pending work.
|
||||
// Deliver that work to the runner channel if it is desired.
|
||||
func (s *SerialQueue) runScheduler(ctx context.Context) {
|
||||
state := schedState_Running
|
||||
normalQ := circular.NewBuff[work](16)
|
||||
highQ := circular.NewBuff[work](16)
|
||||
for {
|
||||
var sendWorkCh chan work
|
||||
var sendWork work
|
||||
var sentWorkCallback func()
|
||||
|
||||
if state == schedState_Running {
|
||||
if highQ.Len() > 0 {
|
||||
sendWorkCh = s.runnerCh
|
||||
sendWork = highQ.Front()
|
||||
sentWorkCallback = highQ.Pop
|
||||
} else if normalQ.Len() > 0 {
|
||||
sendWorkCh = s.runnerCh
|
||||
sendWork = normalQ.Front()
|
||||
sentWorkCallback = normalQ.Pop
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case msg := <-s.schedCh:
|
||||
switch msg.reqType {
|
||||
case schedReqType_Enqueue:
|
||||
if state == schedState_Stopped {
|
||||
msg.resp <- schedResp{
|
||||
err: ErrStoppedQueue,
|
||||
}
|
||||
} else {
|
||||
if msg.pri == schedPriority_High {
|
||||
highQ.Push(msg.work)
|
||||
} else {
|
||||
normalQ.Push(msg.work)
|
||||
}
|
||||
msg.resp <- schedResp{
|
||||
err: nil,
|
||||
}
|
||||
}
|
||||
case schedReqType_Purge:
|
||||
highQ = circular.NewBuff[work](highQ.Cap())
|
||||
normalQ = circular.NewBuff[work](normalQ.Cap())
|
||||
msg.resp <- schedResp{
|
||||
err: nil,
|
||||
}
|
||||
case schedReqType_Start:
|
||||
state = schedState_Running
|
||||
msg.resp <- schedResp{
|
||||
err: nil,
|
||||
}
|
||||
case schedReqType_Pause:
|
||||
state = schedState_Paused
|
||||
msg.resp <- schedResp{
|
||||
err: nil,
|
||||
}
|
||||
case schedReqType_Stop:
|
||||
state = schedState_Stopped
|
||||
msg.resp <- schedResp{
|
||||
err: nil,
|
||||
}
|
||||
}
|
||||
case sendWorkCh <- sendWork:
|
||||
// Pop from queue the work came from.
|
||||
sentWorkCallback()
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read off the runner channel and run the submitted work.
|
||||
func (s *SerialQueue) runRunner(ctx context.Context) {
|
||||
ticker := time.NewTicker(1)
|
||||
for {
|
||||
select {
|
||||
case w := <-s.runnerCh:
|
||||
if w.newRate > 0 {
|
||||
ticker.Reset(w.newRate)
|
||||
}
|
||||
|
||||
// do not run jobs more frequently than the ticker rate
|
||||
select {
|
||||
case <-ticker.C:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
|
||||
func() {
|
||||
var err error
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = fmt.Errorf("serialQueue panicked running work: %s", r)
|
||||
}
|
||||
if err != nil {
|
||||
s.errCb(err)
|
||||
}
|
||||
}()
|
||||
err = w.f()
|
||||
}()
|
||||
close(w.done)
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,361 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package jobqueue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSerialQueue(t *testing.T) {
|
||||
if runtime.GOOS == "windows" && os.Getenv("CI") != "" {
|
||||
t.Skip("Racy on Windows CI")
|
||||
}
|
||||
t.Run("CanceledRunContext", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
queue := NewSerialQueue()
|
||||
// This should return.
|
||||
queue.Run(ctx)
|
||||
// Now all methods should return ErrCompletedQueue.
|
||||
assert.ErrorIs(t, queue.Start(), ErrCompletedQueue)
|
||||
assert.ErrorIs(t, queue.Pause(), ErrCompletedQueue)
|
||||
assert.ErrorIs(t, queue.Stop(), ErrCompletedQueue)
|
||||
assert.ErrorIs(t, queue.DoSync(context.Background(), func() error { return nil }), ErrCompletedQueue)
|
||||
assert.ErrorIs(t, queue.DoAsync(func() error { return nil }), ErrCompletedQueue)
|
||||
assert.ErrorIs(t, queue.InterruptSync(context.Background(), func() error { return nil }), ErrCompletedQueue)
|
||||
assert.ErrorIs(t, queue.InterruptAsync(func() error { return nil }), ErrCompletedQueue)
|
||||
})
|
||||
t.Run("StartsRunning", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
var ran bool
|
||||
err := queue.DoSync(context.Background(), func() error {
|
||||
ran = true
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, ran, "the sync task ran.")
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("StoppedQueueReturnsError", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
assert.NoError(t, queue.Stop())
|
||||
err := queue.DoSync(context.Background(), func() error { return nil })
|
||||
assert.ErrorIs(t, err, ErrStoppedQueue)
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("PausedQueueDoesNotRun", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
assert.NoError(t, queue.Pause())
|
||||
var ran bool
|
||||
for i := 0; i < 16; i++ {
|
||||
err := queue.DoAsync(func() error {
|
||||
ran = true
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
cancel()
|
||||
wg.Wait()
|
||||
assert.False(t, ran, "work did not run on the paused queue.")
|
||||
})
|
||||
t.Run("StartingPausedQueueRunsIt", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
assert.NoError(t, queue.Pause())
|
||||
var ran bool
|
||||
for i := 0; i < 16; i++ {
|
||||
err := queue.DoAsync(func() error {
|
||||
ran = true
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
assert.NoError(t, queue.Start())
|
||||
err := queue.DoSync(context.Background(), func() error { return nil })
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, ran, "work ran after the paused queue was started.")
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("InterruptWorkRunsFirst", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
assert.NoError(t, queue.Pause())
|
||||
var cnt int
|
||||
queue.DoAsync(func() error {
|
||||
assert.Equal(t, cnt, 2)
|
||||
cnt += 1
|
||||
return nil
|
||||
})
|
||||
queue.DoAsync(func() error {
|
||||
assert.Equal(t, cnt, 3)
|
||||
cnt += 1
|
||||
return nil
|
||||
})
|
||||
queue.InterruptAsync(func() error {
|
||||
assert.Equal(t, cnt, 0)
|
||||
cnt += 1
|
||||
return nil
|
||||
})
|
||||
queue.InterruptAsync(func() error {
|
||||
assert.Equal(t, cnt, 1)
|
||||
cnt += 1
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, queue.Start())
|
||||
assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil }))
|
||||
assert.Equal(t, cnt, 4)
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("StopFromQueue", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
// block until queue is running
|
||||
assert.NoError(t, queue.DoSync(ctx, func() error {
|
||||
return nil
|
||||
}))
|
||||
var cnt int
|
||||
for i := 0; i < 16; i++ {
|
||||
// Some of these calls may error, since the queue
|
||||
// will be stopped asynchronously.
|
||||
queue.DoAsync(func() error {
|
||||
cnt += 1
|
||||
assert.NoError(t, queue.Stop())
|
||||
return nil
|
||||
})
|
||||
}
|
||||
assert.Equal(t, cnt, 1)
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("PauseFromQueue", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
// block until queue is running
|
||||
assert.NoError(t, queue.DoSync(ctx, func() error {
|
||||
return nil
|
||||
}))
|
||||
|
||||
done := make(chan struct{})
|
||||
for i := 0; i < 16; i++ {
|
||||
err := queue.DoAsync(func() error {
|
||||
close(done)
|
||||
assert.NoError(t, queue.Pause())
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
<-done
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("PurgeFromQueue", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
|
||||
assert.NoError(t, queue.Pause())
|
||||
var cnt int
|
||||
didRun := make(chan struct{})
|
||||
for i := 0; i < 16; i++ {
|
||||
err := queue.DoAsync(func() error {
|
||||
cnt += 1
|
||||
assert.NoError(t, queue.Purge())
|
||||
close(didRun)
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
assert.NoError(t, queue.Start())
|
||||
<-didRun
|
||||
assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil }))
|
||||
assert.Equal(t, cnt, 1)
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("DoSyncInQueueDeadlockWithContext", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
start := make(chan struct{})
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
close(start)
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
<-start
|
||||
var cnt int
|
||||
err := queue.DoSync(context.Background(), func() error {
|
||||
cnt += 1
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer cancel()
|
||||
err := queue.DoSync(ctx, func() error {
|
||||
cnt += 1
|
||||
return nil
|
||||
})
|
||||
assert.ErrorIs(t, err, context.DeadlineExceeded)
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.NoError(t, queue.DoSync(context.Background(), func() error { return nil }))
|
||||
// Both tasks eventually ran...
|
||||
assert.Equal(t, cnt, 2)
|
||||
cancel()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("SyncReturnsErrCompletedQueueAfterWorkAccepted", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
queue := NewSerialQueue()
|
||||
start := make(chan struct{})
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
close(start)
|
||||
queue.Run(ctx)
|
||||
return nil
|
||||
}()
|
||||
<-start
|
||||
queue.Pause()
|
||||
var err error
|
||||
var ran bool
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
err = queue.InterruptSync(context.Background(), func() error {
|
||||
ran = true
|
||||
return nil
|
||||
})
|
||||
return nil
|
||||
}()
|
||||
wg.Add(1)
|
||||
go func() error {
|
||||
defer wg.Done()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
queue.Stop()
|
||||
return nil
|
||||
}()
|
||||
cancel()
|
||||
wg.Wait()
|
||||
assert.ErrorIs(t, err, ErrCompletedQueue)
|
||||
assert.False(t, ran, "the interrupt task never ran.")
|
||||
})
|
||||
t.Run("RateLimitWorkThroughput", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
queue := NewSerialQueue()
|
||||
running := make(chan struct{})
|
||||
go func() {
|
||||
close(running)
|
||||
queue.Run(ctx)
|
||||
}()
|
||||
<-running
|
||||
|
||||
// first will run because timeout > job rate
|
||||
ran := false
|
||||
subCtx, cancel2 := context.WithTimeout(ctx, 5*time.Millisecond)
|
||||
defer cancel2()
|
||||
err := queue.DoSync(subCtx, func() error {
|
||||
ran = true
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, ran, "the interrupt task never ran.")
|
||||
|
||||
// second timeout < jobrate, will fail
|
||||
queue.NewRateLimit(10 * time.Millisecond)
|
||||
ran = false
|
||||
subCtx, cancel3 := context.WithTimeout(ctx, 5*time.Millisecond)
|
||||
defer cancel3()
|
||||
err = queue.DoSync(subCtx, func() error {
|
||||
ran = true
|
||||
return nil
|
||||
})
|
||||
assert.ErrorIs(t, err, context.DeadlineExceeded)
|
||||
assert.False(t, ran, "the interrupt task never ran.")
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,259 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
)
|
||||
|
||||
var ErrStatsIssuerPaused = fmt.Errorf("stats issuer is paused")
|
||||
|
||||
type listenerEvent uint16
|
||||
|
||||
const (
|
||||
leUnknown = listenerEvent(iota)
|
||||
leSwap listenerEvent = 1 << 0
|
||||
leStop listenerEvent = 1 << 1
|
||||
leGc listenerEvent = 1 << 2
|
||||
leFlush listenerEvent = 1 << 3
|
||||
)
|
||||
|
||||
func (sc *StatsController) signalListener(s listenerEvent) {
|
||||
keep := 0
|
||||
for i, l := range sc.listeners {
|
||||
if (l.target|leStop)&s > 0 {
|
||||
l.c <- s
|
||||
close(l.c)
|
||||
} else {
|
||||
sc.listeners[keep] = sc.listeners[i]
|
||||
keep++
|
||||
}
|
||||
}
|
||||
sc.listeners = sc.listeners[:keep]
|
||||
}
|
||||
|
||||
func (sc *StatsController) newThreadCtx(ctx context.Context) context.Context {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
|
||||
newCtx, cancel := context.WithCancel(ctx)
|
||||
if sc.activeCtxCancel != nil {
|
||||
sc.activeCtxCancel()
|
||||
}
|
||||
sc.signalListener(leStop)
|
||||
sc.activeCtxCancel = cancel
|
||||
return newCtx
|
||||
}
|
||||
|
||||
type listener struct {
|
||||
target listenerEvent
|
||||
c chan listenerEvent
|
||||
}
|
||||
|
||||
func (sc *StatsController) addListener(e listenerEvent) (chan listenerEvent, error) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
if sc.activeCtxCancel == nil {
|
||||
return nil, ErrStatsIssuerPaused
|
||||
}
|
||||
l := listener{target: e, c: make(chan listenerEvent, 1)}
|
||||
sc.listeners = append(sc.listeners, l)
|
||||
return l.c, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) Stop() {
|
||||
// xxx: do not pause |sq|, analyze jobs still need to run
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
if sc.activeCtxCancel != nil {
|
||||
sc.activeCtxCancel()
|
||||
sc.activeCtxCancel = nil
|
||||
}
|
||||
sc.signalListener(leStop)
|
||||
return
|
||||
}
|
||||
|
||||
// RefreshFromSysVars reads the environment variables and updates controller
|
||||
// parameters. If the queue is not started this will hang.
|
||||
func (sc *StatsController) RefreshFromSysVars() {
|
||||
_, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly)
|
||||
sc.SetMemOnly(memOnly.(int8) == 1)
|
||||
|
||||
_, gcEnabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCEnabled)
|
||||
sc.SetEnableGc(gcEnabled.(int8) == 1)
|
||||
|
||||
typ, jobI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsJobInterval)
|
||||
_, gcI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCInterval)
|
||||
|
||||
jobInterval, _, _ := typ.GetType().Convert(jobI)
|
||||
gcInterval, _, _ := typ.GetType().Convert(gcI)
|
||||
|
||||
sc.SetTimers(
|
||||
jobInterval.(int64)*int64(time.Millisecond),
|
||||
gcInterval.(int64)*int64(time.Millisecond),
|
||||
)
|
||||
}
|
||||
|
||||
func (sc *StatsController) Restart() error {
|
||||
select {
|
||||
case <-sc.closed:
|
||||
return fmt.Errorf("StatsController is closed")
|
||||
default:
|
||||
}
|
||||
|
||||
sc.sq.Start()
|
||||
sc.RefreshFromSysVars()
|
||||
|
||||
done := make(chan struct{})
|
||||
if err := sc.bgThreads.Add("stats_worker", func(ctx context.Context) {
|
||||
ctx = sc.newThreadCtx(ctx)
|
||||
close(done)
|
||||
err := sc.runWorker(ctx)
|
||||
if err != nil {
|
||||
sc.logger.Errorf("stats stopped: %s", err.Error())
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
// only return after latestCtx updated
|
||||
<-done
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) RunQueue() {
|
||||
if err := sc.bgThreads.Add("stats_scheduler", sc.sq.Run); err != nil {
|
||||
sc.descError("start scheduler", err)
|
||||
}
|
||||
// block on queue starting
|
||||
sc.sq.DoSync(context.Background(), func() error { return nil })
|
||||
return
|
||||
}
|
||||
|
||||
// Init should only be called once
|
||||
func (sc *StatsController) Init(ctx context.Context, pro *sqle.DoltDatabaseProvider, ctxGen ctxFactory, bthreads *sql.BackgroundThreads, dbs []sql.Database) error {
|
||||
sc.pro = pro
|
||||
sc.ctxGen = ctxGen
|
||||
sc.bgThreads = bthreads
|
||||
|
||||
sc.RunQueue()
|
||||
sqlCtx, err := sc.ctxGen(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sql.SessionEnd(sqlCtx.Session)
|
||||
sql.SessionCommandBegin(sqlCtx.Session)
|
||||
defer sql.SessionCommandEnd(sqlCtx.Session)
|
||||
|
||||
for i, db := range dbs {
|
||||
if db, ok := db.(sqle.Database); ok { // exclude read replica dbs
|
||||
fs, err := sc.pro.FileSystemForDatabase(db.AliasedName())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := sc.AddFs(sqlCtx, db, fs, false); err != nil {
|
||||
return err
|
||||
}
|
||||
if i > 0 || sc.memOnly {
|
||||
continue
|
||||
}
|
||||
// attempt to access previously written stats
|
||||
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
exists, isDir := statsFs.Exists("")
|
||||
if exists && isDir {
|
||||
newKv, err := sc.initStorage(ctx, fs)
|
||||
if err == nil {
|
||||
sc.kv = newKv
|
||||
sc.statsBackingDb = fs
|
||||
continue
|
||||
} else {
|
||||
path, _ := statsFs.Abs("")
|
||||
sc.descError("failed to reboot stats from: "+path, err)
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise wipe and create new stats dir
|
||||
if err := sc.lockedRotateStorage(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) waitForSignal(ctx context.Context, signal listenerEvent, cnt int) (err error) {
|
||||
for cnt > 0 {
|
||||
var l chan listenerEvent
|
||||
l, err = sc.addListener(signal)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return context.Cause(ctx)
|
||||
case <-l:
|
||||
cnt--
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) WaitForSync(ctx context.Context) (err error) {
|
||||
// wait for 2 cycles because first completion is usually a stale context
|
||||
return sc.waitForSignal(ctx, leSwap, 2)
|
||||
}
|
||||
|
||||
func (sc *StatsController) WaitForFlush(ctx *sql.Context) error {
|
||||
sc.mu.Lock()
|
||||
memOnly := sc.memOnly
|
||||
sc.mu.Unlock()
|
||||
if memOnly {
|
||||
return fmt.Errorf("memory only statistics will not flush")
|
||||
}
|
||||
return sc.waitForSignal(ctx, leFlush, 1)
|
||||
}
|
||||
|
||||
func (sc *StatsController) Gc(ctx *sql.Context) error {
|
||||
sc.setDoGc(true)
|
||||
return sc.waitForSignal(ctx, leGc, 1)
|
||||
}
|
||||
|
||||
func (sc *StatsController) Close() {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
if sc.activeCtxCancel != nil {
|
||||
sc.activeCtxCancel()
|
||||
sc.activeCtxCancel = nil
|
||||
sc.sq.InterruptAsync(func() error {
|
||||
return sc.sq.Stop()
|
||||
})
|
||||
}
|
||||
sc.signalListener(leStop)
|
||||
|
||||
close(sc.closed)
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,250 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
func TestListening(t *testing.T) {
|
||||
bthreads := sql.NewBackgroundThreads()
|
||||
defer bthreads.Shutdown()
|
||||
t.Run("ClosedDoesNotStart", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
sc.Close()
|
||||
require.Error(t, sc.Restart())
|
||||
require.Nil(t, sc.activeCtxCancel)
|
||||
})
|
||||
t.Run("IsStoppable", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
eg := errgroup.Group{}
|
||||
ctx := sc.newThreadCtx(context.Background())
|
||||
eg.Go(func() error {
|
||||
return sc.runWorker(ctx)
|
||||
})
|
||||
|
||||
require.NotNil(t, sc.activeCtxCancel)
|
||||
|
||||
l, err := sc.addListener(leSwap)
|
||||
require.NoError(t, err)
|
||||
<-l
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
t.Fatal("expected latest thread ctx to be active")
|
||||
default:
|
||||
}
|
||||
sc.Stop()
|
||||
<-ctx.Done()
|
||||
require.ErrorIs(t, eg.Wait(), context.Canceled)
|
||||
})
|
||||
t.Run("StopsAreIdempotent", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
eg := errgroup.Group{}
|
||||
ctx := sc.newThreadCtx(context.Background())
|
||||
eg.Go(func() error {
|
||||
return sc.runWorker(ctx)
|
||||
})
|
||||
|
||||
sc.Stop()
|
||||
sc.Stop()
|
||||
sc.Stop()
|
||||
sc.Stop()
|
||||
<-ctx.Done()
|
||||
require.ErrorIs(t, eg.Wait(), context.Canceled)
|
||||
})
|
||||
t.Run("IsRestartable", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
eg := errgroup.Group{}
|
||||
ctx1 := sc.newThreadCtx(context.Background())
|
||||
eg.Go(func() error {
|
||||
return sc.runWorker(ctx1)
|
||||
})
|
||||
|
||||
ctx2 := sc.newThreadCtx(context.Background())
|
||||
eg.Go(func() error {
|
||||
return sc.runWorker(ctx2)
|
||||
})
|
||||
|
||||
ctx3 := sc.newThreadCtx(context.Background())
|
||||
eg.Go(func() error {
|
||||
return sc.runWorker(ctx3)
|
||||
})
|
||||
|
||||
<-ctx1.Done()
|
||||
<-ctx2.Done()
|
||||
sc.Stop()
|
||||
<-ctx3.Done()
|
||||
require.ErrorIs(t, eg.Wait(), context.Canceled)
|
||||
})
|
||||
t.Run("ConcurrentStartStopsAreOk", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for range 20 {
|
||||
require.NoError(t, sc.Restart())
|
||||
l, err := sc.addListener(leSwap)
|
||||
if err != nil {
|
||||
require.ErrorIs(t, err, ErrStatsIssuerPaused)
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case <-l:
|
||||
}
|
||||
}
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for range 20 {
|
||||
sc.Stop()
|
||||
l, err := sc.addListener(leSwap)
|
||||
if err != nil {
|
||||
require.ErrorIs(t, err, ErrStatsIssuerPaused)
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case <-l:
|
||||
case <-time.Tick(10 * time.Millisecond):
|
||||
print()
|
||||
}
|
||||
}
|
||||
}()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("ListenForSwap", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
require.NoError(t, sc.Restart())
|
||||
l, err := sc.addListener(leSwap)
|
||||
require.NoError(t, err)
|
||||
select {
|
||||
case e := <-l:
|
||||
require.True(t, (leSwap&e) > 0, "expected success or gc signal")
|
||||
}
|
||||
})
|
||||
t.Run("ListenForStop", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
require.NoError(t, sc.Restart())
|
||||
var l chan listenerEvent
|
||||
err := sc.sq.DoSync(context.Background(), func() error {
|
||||
// do this in serial queue to make sure we don't race
|
||||
// with swap
|
||||
var err error
|
||||
require.NoError(t, err)
|
||||
l, err = sc.addListener(leUnknown)
|
||||
require.NoError(t, err)
|
||||
sc.Stop()
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
select {
|
||||
case e := <-l:
|
||||
require.Equal(t, e, leStop)
|
||||
default:
|
||||
t.Fatal("expected listener to recv stop")
|
||||
}
|
||||
})
|
||||
t.Run("ListenerFailsIfStopped", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
require.NoError(t, sc.Restart())
|
||||
sc.Stop()
|
||||
_, err := sc.addListener(leUnknown)
|
||||
require.ErrorIs(t, err, ErrStatsIssuerPaused)
|
||||
})
|
||||
t.Run("ListenerFailsIfClosed", func(t *testing.T) {
|
||||
sc := newStatsCoord(bthreads)
|
||||
sc.Close()
|
||||
require.Error(t, sc.Restart())
|
||||
_, err := sc.addListener(leUnknown)
|
||||
require.ErrorIs(t, err, ErrStatsIssuerPaused)
|
||||
})
|
||||
t.Run("WaitBlocksOnStatsCollection", func(t *testing.T) {
|
||||
sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true)
|
||||
require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)"))
|
||||
require.NoError(t, sc.Restart())
|
||||
done := make(chan struct{})
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
err := sc.sq.DoAsync(func() error {
|
||||
defer wg.Done()
|
||||
<-done
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer close(done)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer cancel()
|
||||
err := sc.waitForSignal(ctx, leSwap, 1)
|
||||
require.ErrorIs(t, err, context.DeadlineExceeded)
|
||||
}()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("WaitReturnsIfStoppedBefore", func(t *testing.T) {
|
||||
sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true)
|
||||
require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)"))
|
||||
require.NoError(t, sc.Restart())
|
||||
done := make(chan struct{})
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
err := sc.sq.DoAsync(func() error {
|
||||
defer wg.Done()
|
||||
<-done
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer close(done)
|
||||
sc.Stop()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer cancel()
|
||||
err := sc.waitForSignal(ctx, leSwap, 1)
|
||||
require.ErrorIs(t, err, ErrStatsIssuerPaused)
|
||||
}()
|
||||
wg.Wait()
|
||||
})
|
||||
t.Run("WaitHangsUntilCycleCompletes", func(t *testing.T) {
|
||||
sqlCtx, sqlEng, sc := emptySetup(t, bthreads, true, true)
|
||||
require.NoError(t, executeQuery(sqlCtx, sqlEng, "create table xy (x int primary key, y int)"))
|
||||
require.NoError(t, sc.Restart())
|
||||
done := make(chan struct{})
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
err := sc.sq.DoAsync(func() error {
|
||||
defer wg.Done()
|
||||
<-done
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer cancel()
|
||||
err := sc.waitForSignal(ctx, leSwap, 1)
|
||||
require.NoError(t, err)
|
||||
}()
|
||||
close(done)
|
||||
wg.Wait()
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
)
|
||||
|
||||
type StatsNoop struct{}
|
||||
|
||||
func (s StatsNoop) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) AnalyzeTable(ctx *sql.Context, table sql.Table, db string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) SetStats(ctx *sql.Context, stats sql.Statistic) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (s StatsNoop) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) DropDbStats(ctx *sql.Context, db string, flush bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) CancelRefreshThread(string) {
|
||||
return
|
||||
}
|
||||
|
||||
func (s StatsNoop) StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) ThreadStatus(string) string {
|
||||
return "stats disabled"
|
||||
}
|
||||
|
||||
func (s StatsNoop) Prune(ctx *sql.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) Purge(ctx *sql.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) WaitForSync(ctx *sql.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s StatsNoop) CollectOnce(ctx *sql.Context) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
var _ sql.StatsProvider = StatsNoop{}
|
||||
@@ -0,0 +1,731 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures"
|
||||
)
|
||||
|
||||
type scriptTest struct {
|
||||
name string
|
||||
setup []string
|
||||
assertions []assertion
|
||||
}
|
||||
|
||||
type assertion struct {
|
||||
query string
|
||||
res []sql.Row
|
||||
err string
|
||||
}
|
||||
|
||||
func TestStatScripts(t *testing.T) {
|
||||
threads := sql.NewBackgroundThreads()
|
||||
defer threads.Shutdown()
|
||||
|
||||
scripts := []scriptTest{
|
||||
{
|
||||
name: "track updates",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y varchar(16), key (y,x))",
|
||||
"insert into xy values (0,'zero'), (1, 'one')",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
|
||||
},
|
||||
{
|
||||
query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(9)}},
|
||||
},
|
||||
{
|
||||
query: "update xy set y = 2 where x between 100 and 800",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(9)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "track deletes",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y varchar(16), key (y,x))",
|
||||
"insert into xy values (0,'zero'), (1, 'one')",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
|
||||
},
|
||||
{
|
||||
query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(9)}},
|
||||
},
|
||||
{
|
||||
query: "delete from xy where x > 600",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(5)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ddl table",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y varchar(16), key (y,x))",
|
||||
"insert into xy values (0,'0'), (1,'0'), (2,'0')",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
{
|
||||
query: "truncate table xy",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(0)}},
|
||||
},
|
||||
{
|
||||
query: "insert into xy values (0,'0'), (1,'0'), (2,'0')",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
{
|
||||
query: "drop table xy",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(0)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ddl index",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y varchar(16), key (y,x))",
|
||||
"insert into xy values (0,'0'), (1,'0'), (2,'0')",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
{
|
||||
query: "alter table xy drop index y",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(1)}},
|
||||
},
|
||||
{
|
||||
query: "alter table xy add index yx (y,x)",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
{
|
||||
query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
|
||||
res: []sql.Row{{"varchar(16),int", "0,2"}},
|
||||
},
|
||||
{
|
||||
query: "alter table xy modify column y int",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
|
||||
res: []sql.Row{{"int,int", "0,2"}},
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "mcv counts",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y int, key (y,x))",
|
||||
"alter table xy add index y2 (y)",
|
||||
"alter table xy add index x2 (x,y)",
|
||||
"insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select mcv1, mcv2, mcv_counts from dolt_statistics where index_name = 'y2'",
|
||||
res: []sql.Row{{"1", "0", "4,6"}},
|
||||
},
|
||||
{
|
||||
query: "select mcv_counts from dolt_statistics where index_name = 'y'",
|
||||
res: []sql.Row{{""}},
|
||||
},
|
||||
{
|
||||
query: "select mcv_counts from dolt_statistics where index_name = 'x2'",
|
||||
res: []sql.Row{{""}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "vector index",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y json, vector key(y))",
|
||||
"insert into xy values (0, '0'), (1, '1'), (2, '2'), (3, NULL), (4, NULL)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "xy", "primary"}},
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 1,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 1,
|
||||
CachedBucketCnt: 1,
|
||||
CachedBoundCnt: 1,
|
||||
CachedTemplateCnt: 1,
|
||||
StatCnt: 1,
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "generated index",
|
||||
setup: []string{
|
||||
"create table t (pk int primary key, c0 int, c1 int as (c0) virtual, index idx(c1))",
|
||||
"insert into t (pk, c0) values (0,0), (1,1), (2,2), (3,NULL), (4,NULL)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "t", "idx"}, {"mydb", "t", "primary"}},
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 1,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 1,
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "keyless index",
|
||||
setup: []string{
|
||||
"create table t (c1 int, c2 int, index (c2))",
|
||||
"insert into t values (0,0), (1,1), (2,2), (3,NULL), (4,NULL)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "t", "c2"}},
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 1,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 1,
|
||||
CachedBucketCnt: 1,
|
||||
CachedBoundCnt: 1,
|
||||
CachedTemplateCnt: 1,
|
||||
StatCnt: 1,
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "caps testing",
|
||||
setup: []string{
|
||||
"create table XY (x int primary key, Y int, key Yx (Y,x))",
|
||||
"alter table xy add index y2 (y)",
|
||||
"insert into xy values (0,0), (1,0), (2,0)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y2"}, {"mydb", "xy", "yx"}},
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(3)}},
|
||||
},
|
||||
{
|
||||
query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(12)}},
|
||||
},
|
||||
{
|
||||
query: "delete from xy where x > 500",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(6)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "database ddl",
|
||||
setup: []string{
|
||||
"create table mydb.xy (x int primary key, y int, key (y,x))",
|
||||
"insert into xy values (0,0), (1,0), (2,0)",
|
||||
"create database repo2",
|
||||
"create table repo2.xy (x int primary key, y int, key (y,x))",
|
||||
"insert into repo2.xy values (0,0), (1,0), (2,0)",
|
||||
"create table repo2.ab (a int primary key, b int, key (b,a))",
|
||||
"insert into repo2.ab values (0,0), (1,0), (2,0)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{
|
||||
{"mydb", "xy", "primary"}, {"mydb", "xy", "y"},
|
||||
},
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
{
|
||||
query: "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name",
|
||||
res: []sql.Row{
|
||||
{"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
|
||||
{"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
|
||||
},
|
||||
},
|
||||
{
|
||||
query: "use repo2",
|
||||
},
|
||||
{
|
||||
query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
|
||||
res: []sql.Row{
|
||||
{"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
|
||||
{"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
|
||||
},
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(4)}},
|
||||
},
|
||||
{
|
||||
query: "insert into repo2.xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(10)}},
|
||||
},
|
||||
{
|
||||
query: "drop database repo2",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "use mydb",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "recreate table without index",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y int, key (y,x))",
|
||||
"insert into xy values (0,0), (1,0), (2,0)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(2)}},
|
||||
},
|
||||
{
|
||||
query: "drop table xy",
|
||||
},
|
||||
{
|
||||
query: "create table xy (x int primary key, y int)",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "select count(*) from dolt_statistics",
|
||||
res: []sql.Row{{int64(0)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "stats info",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y int, key (y,x))",
|
||||
"insert into xy values (0,0), (1,0), (2,0)",
|
||||
"call dolt_add('-A')",
|
||||
"call dolt_commit('-m', 'create xy')",
|
||||
"call dolt_checkout('-b', 'feat')",
|
||||
"call dolt_checkout('main')",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 2,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 2,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
query: "call dolt_checkout('feat')",
|
||||
},
|
||||
{
|
||||
query: "drop table xy",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 2,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 1,
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
query: "call dolt_checkout('main')",
|
||||
},
|
||||
{
|
||||
query: "call dolt_branch('-D', 'feat')",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 1,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 1,
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "stats stop/start",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y int, key (y,x))",
|
||||
"insert into xy values (0,0), (1,0), (2,0)",
|
||||
"call dolt_add('-A')",
|
||||
"call dolt_commit('-m', 'create xy')",
|
||||
"call dolt_checkout('-b', 'feat')",
|
||||
"call dolt_checkout('main')",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 2,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 2,
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_stop()",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 2,
|
||||
Backing: "mydb",
|
||||
Active: false,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 2,
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 2,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 2,
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "stats purge",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y int, key (y,x))",
|
||||
"insert into xy values (0,0), (1,0), (2,0)",
|
||||
"call dolt_add('-A')",
|
||||
"call dolt_commit('-m', 'create xy')",
|
||||
"call dolt_checkout('-b', 'feat')",
|
||||
"call dolt_checkout('main')",
|
||||
"insert into xy values (3,0)",
|
||||
"call dolt_checkout('feat')",
|
||||
"insert into xy values (3,0)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "call dolt_stats_purge()",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 0,
|
||||
Backing: "mydb",
|
||||
Active: false,
|
||||
StorageBucketCnt: 0,
|
||||
CachedBucketCnt: 0,
|
||||
CachedBoundCnt: 0,
|
||||
CachedTemplateCnt: 0,
|
||||
StatCnt: 0,
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_restart()",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_wait()",
|
||||
},
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{
|
||||
{dprocedures.StatsInfo{
|
||||
DbCnt: 2,
|
||||
Backing: "mydb",
|
||||
Active: true,
|
||||
StorageBucketCnt: 2,
|
||||
CachedBucketCnt: 2,
|
||||
CachedBoundCnt: 2,
|
||||
CachedTemplateCnt: 2,
|
||||
StatCnt: 2,
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "null bounds",
|
||||
setup: []string{
|
||||
"create table xy (x int primary key, y int, key (y))",
|
||||
"insert into xy values (0,NULL), (1,0), (2,0)",
|
||||
"CREATE table xyz (x bigint primary key, y varchar(500), z bigint, key(x, z));",
|
||||
"insert into xyz values (0,0,NULL), (1,1,0), (2,2,0)",
|
||||
},
|
||||
assertions: []assertion{
|
||||
{
|
||||
query: "call dolt_stats_info('--short')",
|
||||
res: []sql.Row{{dprocedures.StatsInfo{
|
||||
DbCnt: 1,
|
||||
Active: true,
|
||||
StorageBucketCnt: 4,
|
||||
CachedBucketCnt: 4,
|
||||
CachedBoundCnt: 4,
|
||||
CachedTemplateCnt: 4,
|
||||
StatCnt: 2,
|
||||
Backing: "mydb",
|
||||
}}},
|
||||
},
|
||||
{
|
||||
query: "select index_name, null_count from dolt_statistics",
|
||||
res: []sql.Row{{"primary", uint64(0)}, {"y", uint64(1)}, {"primary", uint64(0)}, {"x", uint64(1)}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range scripts {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
bthreads := sql.NewBackgroundThreads()
|
||||
ctx, sqlEng, sc := emptySetup(t, bthreads, false, false)
|
||||
|
||||
defer sqlEng.Close()
|
||||
|
||||
require.NoError(t, sc.Restart())
|
||||
|
||||
//sc.Debug = true
|
||||
|
||||
for _, s := range tt.setup {
|
||||
require.NoError(t, executeQuery(ctx, sqlEng, s))
|
||||
}
|
||||
|
||||
require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
|
||||
require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_flush()"))
|
||||
|
||||
for i, a := range tt.assertions {
|
||||
if sc.Debug {
|
||||
log.Println(a.query)
|
||||
}
|
||||
rows, err := executeQueryResults(ctx, sqlEng, a.query)
|
||||
if a.err != "" {
|
||||
require.Equal(t, a.err, err.Error())
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
if a.res != nil {
|
||||
cmp, exp := normalize(rows, a.res)
|
||||
require.Equal(t, exp, cmp, "query no "+strconv.Itoa(i)+" failed: "+a.query)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func normalize(cmp, exp []sql.Row) ([]sql.Row, []sql.Row) {
|
||||
for i, r := range exp {
|
||||
for j, v := range r {
|
||||
if _, ok := v.(dprocedures.StatsInfo); ok {
|
||||
if strSi, ok := cmp[i][j].(string); ok {
|
||||
si := dprocedures.StatsInfo{}
|
||||
if err := json.Unmarshal([]byte(strSi), &si); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
si.GenCnt = 0
|
||||
cmp[i][j] = si
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return cmp, exp
|
||||
}
|
||||
@@ -0,0 +1,550 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
"github.com/dolthub/go-mysql-server/sql/types"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
var ErrIncompatibleVersion = errors.New("client stats version mismatch")
|
||||
|
||||
type StatsKv interface {
|
||||
PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error
|
||||
GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error)
|
||||
GetTemplate(key templateCacheKey) (stats.Statistic, bool)
|
||||
PutTemplate(key templateCacheKey, stat stats.Statistic)
|
||||
GetBound(h hash.Hash, len int) (sql.Row, bool)
|
||||
PutBound(h hash.Hash, r sql.Row, l int)
|
||||
Flush(ctx context.Context) (int, error)
|
||||
Len() int
|
||||
GcGen() uint64
|
||||
}
|
||||
|
||||
var _ StatsKv = (*prollyStats)(nil)
|
||||
var _ StatsKv = (*memStats)(nil)
|
||||
var _ StatsKv = (*StatsController)(nil)
|
||||
|
||||
func NewMemStats() *memStats {
|
||||
return &memStats{
|
||||
mu: sync.Mutex{},
|
||||
buckets: make(map[bucketKey]*stats.Bucket),
|
||||
templates: make(map[templateCacheKey]stats.Statistic),
|
||||
bounds: make(map[bucketKey]sql.Row),
|
||||
gcFlusher: make(map[*val.TupleBuilder][]bucketKey),
|
||||
}
|
||||
}
|
||||
|
||||
type memStats struct {
|
||||
mu sync.Mutex
|
||||
gcGen uint64
|
||||
|
||||
buckets map[bucketKey]*stats.Bucket
|
||||
templates map[templateCacheKey]stats.Statistic
|
||||
bounds map[bucketKey]sql.Row
|
||||
|
||||
// gcFlusher tracks state require to lazily swap from
|
||||
// a *memStats to *prollyStats
|
||||
gcFlusher map[*val.TupleBuilder][]bucketKey
|
||||
}
|
||||
|
||||
func (m *memStats) StorageCnt(context.Context) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (m *memStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
t, ok := m.templates[key]
|
||||
if !ok {
|
||||
return stats.Statistic{}, false
|
||||
}
|
||||
return t, true
|
||||
}
|
||||
|
||||
func (m *memStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
m.templates[key] = stat
|
||||
}
|
||||
|
||||
type bucketKey [22]byte
|
||||
|
||||
func getBucketKey(h hash.Hash, l int) bucketKey {
|
||||
var k bucketKey
|
||||
copy(k[:hash.ByteLen], h[:])
|
||||
binary.BigEndian.PutUint16(k[hash.ByteLen:], uint16(l))
|
||||
return k
|
||||
}
|
||||
|
||||
func (m *memStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
k := getBucketKey(h, l)
|
||||
r, ok := m.bounds[k]
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
return r, true
|
||||
}
|
||||
|
||||
func (m *memStats) PutBound(h hash.Hash, r sql.Row, l int) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
k := getBucketKey(h, l)
|
||||
m.bounds[k] = r
|
||||
}
|
||||
|
||||
func (m *memStats) GcMark(from StatsKv, nodes []tree.Node, buckets []*stats.Bucket, idxLen int, tb *val.TupleBuilder) bool {
|
||||
if from.GcGen() > m.GcGen() {
|
||||
return false
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
for i, b := range buckets {
|
||||
h := nodes[i].HashOf()
|
||||
k := getBucketKey(h, idxLen)
|
||||
if i == 0 {
|
||||
m.bounds[k], _ = from.GetBound(h, idxLen)
|
||||
}
|
||||
m.buckets[k] = b
|
||||
m.gcFlusher[tb] = append(m.gcFlusher[tb], k)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (m *memStats) GcGen() uint64 {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.gcGen
|
||||
}
|
||||
|
||||
func (m *memStats) Len() int {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return len(m.buckets)
|
||||
}
|
||||
|
||||
func (m *memStats) PutBucket(_ context.Context, h hash.Hash, b *stats.Bucket, _ *val.TupleBuilder) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
k := getBucketKey(h, len(b.BoundVal))
|
||||
m.buckets[k] = b
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *memStats) GetBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if h.IsEmpty() {
|
||||
return nil, false, nil
|
||||
}
|
||||
k := getBucketKey(h, tupB.Desc.Count())
|
||||
b, ok := m.buckets[k]
|
||||
return b, ok, nil
|
||||
}
|
||||
|
||||
func (m *memStats) Flush(_ context.Context) (int, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if m.gcFlusher != nil {
|
||||
m.gcFlusher = nil
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func NewProllyStats(ctx context.Context, destDb dsess.SqlDatabase) (*prollyStats, error) {
|
||||
sch := schema.StatsTableDoltSchema
|
||||
kd, vd := sch.GetMapDescriptors(nil)
|
||||
|
||||
keyBuilder := val.NewTupleBuilder(kd)
|
||||
valueBuilder := val.NewTupleBuilder(vd)
|
||||
newMap, err := prolly.NewMapFromTuples(ctx, destDb.DbData().Ddb.NodeStore(), kd, vd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &prollyStats{
|
||||
mu: sync.Mutex{},
|
||||
destDb: destDb,
|
||||
kb: keyBuilder,
|
||||
vb: valueBuilder,
|
||||
m: newMap.Mutate(),
|
||||
mem: NewMemStats(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type prollyStats struct {
|
||||
mu sync.Mutex
|
||||
destDb dsess.SqlDatabase
|
||||
kb, vb *val.TupleBuilder
|
||||
m *prolly.MutableMap
|
||||
newM *prolly.MutableMap
|
||||
mem *memStats
|
||||
}
|
||||
|
||||
func (p *prollyStats) Len() int {
|
||||
return p.mem.Len()
|
||||
}
|
||||
|
||||
func (p *prollyStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
|
||||
return p.mem.GetTemplate(key)
|
||||
}
|
||||
|
||||
func (p *prollyStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
|
||||
p.mem.PutTemplate(key, stat)
|
||||
}
|
||||
|
||||
func (p *prollyStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
|
||||
return p.mem.GetBound(h, l)
|
||||
}
|
||||
|
||||
func (p *prollyStats) PutBound(h hash.Hash, r sql.Row, l int) {
|
||||
p.mem.PutBound(h, r, l)
|
||||
}
|
||||
|
||||
func (p *prollyStats) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error {
|
||||
if err := p.mem.PutBucket(ctx, h, b, tupB); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
k, err := p.encodeHash(h, tupB.Desc.Count())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
v, err := p.encodeBucket(ctx, b, tupB)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
return p.m.Put(ctx, k, v)
|
||||
}
|
||||
|
||||
func (p *prollyStats) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
|
||||
if h.IsEmpty() {
|
||||
return nil, false, nil
|
||||
}
|
||||
b, ok, err := p.mem.GetBucket(ctx, h, tupB)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
if ok {
|
||||
return b, true, nil
|
||||
}
|
||||
|
||||
// missing bucket and not GC'ing, try disk
|
||||
k, err := p.encodeHash(h, tupB.Desc.Count())
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
var v val.Tuple
|
||||
err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error {
|
||||
if key != nil {
|
||||
ok = true
|
||||
v = value
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if !ok || err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
b, err = p.decodeBucketTuple(ctx, v, tupB)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
p.mem.PutBucket(ctx, h, b, tupB)
|
||||
return b, true, nil
|
||||
}
|
||||
|
||||
func (p *prollyStats) GcGen() uint64 {
|
||||
return p.mem.gcGen
|
||||
}
|
||||
|
||||
func (p *prollyStats) LoadFromMem(ctx context.Context) error {
|
||||
p.mem.mu.Lock()
|
||||
defer p.mem.mu.Unlock()
|
||||
for tb, keys := range p.mem.gcFlusher {
|
||||
for _, key := range keys {
|
||||
b, ok := p.mem.buckets[key]
|
||||
if !ok {
|
||||
return fmt.Errorf("memory KV inconsistent, missing bucket for: %s", key)
|
||||
}
|
||||
tupK, err := p.encodeHash(hash.New(key[:hash.ByteLen]), tb.Desc.Count())
|
||||
tupV, err := p.encodeBucket(ctx, b, tb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p.m.Put(ctx, tupK, tupV); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
p.mem.gcFlusher = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *prollyStats) Flush(ctx context.Context) (int, error) {
|
||||
if err := p.LoadFromMem(ctx); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
flushedMap, err := p.m.Map(ctx)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if err := p.destDb.DbData().Ddb.SetStatistics(ctx, "main", flushedMap.HashOf()); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
p.m = flushedMap.Mutate()
|
||||
|
||||
cnt, err := flushedMap.Count()
|
||||
return cnt, err
|
||||
}
|
||||
|
||||
func (p *prollyStats) encodeHash(h hash.Hash, len int) (val.Tuple, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
p.kb.PutInt64(0, int64(len))
|
||||
if err := p.kb.PutString(1, h.String()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return p.kb.Build(p.m.NodeStore().Pool()), nil
|
||||
}
|
||||
|
||||
func (p *prollyStats) decodeHashTuple(v val.Tuple) (int, hash.Hash, error) {
|
||||
l, ok := p.kb.Desc.GetInt64(0, v)
|
||||
hStr, ok := p.kb.Desc.GetString(1, v)
|
||||
if !ok {
|
||||
return 0, hash.Hash{}, fmt.Errorf("unexpected null hash")
|
||||
}
|
||||
return int(l), hash.Parse(hStr), nil
|
||||
}
|
||||
|
||||
func (p *prollyStats) decodeBucketTuple(ctx context.Context, v val.Tuple, tupB *val.TupleBuilder) (*stats.Bucket, error) {
|
||||
var row []interface{}
|
||||
for i := 0; i < p.vb.Desc.Count(); i++ {
|
||||
f, err := tree.GetField(ctx, p.vb.Desc, i, v, p.m.NodeStore())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
row = append(row, f)
|
||||
}
|
||||
|
||||
version := row[0]
|
||||
if version != schema.StatsVersion {
|
||||
return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
|
||||
}
|
||||
rowCount := row[1].(int64)
|
||||
distinctCount := row[2].(int64)
|
||||
nullCount := row[3].(int64)
|
||||
boundRowStr := row[4].(string)
|
||||
upperBoundCnt := row[5].(int64)
|
||||
mcvCountsStr := row[10].(string)
|
||||
|
||||
boundRow, err := DecodeRow(ctx, p.m.NodeStore(), boundRowStr, tupB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var mcvCnts []uint64
|
||||
if len(mcvCountsStr) > 0 {
|
||||
for _, c := range strings.Split(mcvCountsStr, ",") {
|
||||
cnt, err := strconv.ParseInt(c, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mcvCnts = append(mcvCnts, uint64(cnt))
|
||||
}
|
||||
}
|
||||
|
||||
mcvs := make([]sql.Row, len(mcvCnts))
|
||||
for i, v := range row[6 : 6+len(mcvCnts)] {
|
||||
if v != nil && v != "" {
|
||||
row, err := DecodeRow(ctx, p.m.NodeStore(), v.(string), tupB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mcvs[i] = row
|
||||
}
|
||||
}
|
||||
|
||||
return &stats.Bucket{
|
||||
RowCnt: uint64(rowCount),
|
||||
DistinctCnt: uint64(distinctCount),
|
||||
NullCnt: uint64(nullCount),
|
||||
McvsCnt: mcvCnts,
|
||||
BoundCnt: uint64(upperBoundCnt),
|
||||
BoundVal: boundRow,
|
||||
McvVals: mcvs,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var mcvTypes = []sql.Type{types.Int16, types.Int16, types.Int16, types.Int16}
|
||||
|
||||
func (p *prollyStats) encodeBucket(ctx context.Context, b *stats.Bucket, tupB *val.TupleBuilder) (val.Tuple, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
p.vb.PutInt64(0, schema.StatsVersion)
|
||||
p.vb.PutInt64(1, int64(b.RowCount()))
|
||||
p.vb.PutInt64(2, int64(b.DistinctCount()))
|
||||
p.vb.PutInt64(3, int64(b.NullCount()))
|
||||
boundRow, err := EncodeRow(ctx, p.m.NodeStore(), b.UpperBound(), tupB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.vb.PutString(4, string(boundRow))
|
||||
p.vb.PutInt64(5, int64(b.BoundCount()))
|
||||
for i, r := range b.Mcvs() {
|
||||
mcvRow, err := EncodeRow(ctx, p.m.NodeStore(), r, tupB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.vb.PutString(6+i, string(mcvRow))
|
||||
}
|
||||
var mcvCntsRow sql.Row
|
||||
for _, v := range b.McvCounts() {
|
||||
mcvCntsRow = append(mcvCntsRow, int(v))
|
||||
}
|
||||
p.vb.PutString(10, stats.StringifyKey(mcvCntsRow, mcvTypes[:len(mcvCntsRow)]))
|
||||
|
||||
return p.vb.Build(p.m.NodeStore().Pool()), nil
|
||||
}
|
||||
|
||||
func (p *prollyStats) NewEmpty(ctx context.Context) (StatsKv, error) {
|
||||
kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors(nil)
|
||||
newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := newMap.Mutate()
|
||||
return &prollyStats{m: m, destDb: p.destDb, kb: p.kb, vb: p.vb}, nil
|
||||
}
|
||||
|
||||
func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
|
||||
for i := range tb.Desc.Count() {
|
||||
v := r[i]
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return tb.Build(ns.Pool()), nil
|
||||
}
|
||||
|
||||
func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
|
||||
tup := []byte(s)
|
||||
r := make(sql.Row, tb.Desc.Count())
|
||||
var err error
|
||||
for i, _ := range r {
|
||||
r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.kv.PutBucket(ctx, h, b, tupB)
|
||||
}
|
||||
|
||||
func (sc *StatsController) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.kv.GetBucket(ctx, h, tupB)
|
||||
}
|
||||
|
||||
func (sc *StatsController) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.kv.GetTemplate(key)
|
||||
}
|
||||
|
||||
func (sc *StatsController) PutTemplate(key templateCacheKey, stat stats.Statistic) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
sc.kv.PutTemplate(key, stat)
|
||||
}
|
||||
|
||||
func (sc *StatsController) GetBound(h hash.Hash, len int) (sql.Row, bool) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.kv.GetBound(h, len)
|
||||
}
|
||||
|
||||
func (sc *StatsController) PutBound(h hash.Hash, r sql.Row, l int) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
sc.kv.PutBound(h, r, l)
|
||||
}
|
||||
|
||||
func (sc *StatsController) Flush(ctx context.Context) (int, error) {
|
||||
sqlCtx, err := sc.ctxGen(ctx)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer sql.SessionEnd(sqlCtx.Session)
|
||||
sql.SessionCommandBegin(sqlCtx.Session)
|
||||
defer sql.SessionCommandEnd(sqlCtx.Session)
|
||||
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
defer sc.signalListener(leFlush)
|
||||
return sc.kv.Flush(sqlCtx)
|
||||
}
|
||||
|
||||
func (sc *StatsController) Len() int {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.kv.Len()
|
||||
}
|
||||
|
||||
func (sc *StatsController) GcGen() uint64 {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
return sc.kv.GcGen()
|
||||
}
|
||||
@@ -0,0 +1,200 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/store/chunks"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly/message"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
func TestProllyKv(t *testing.T) {
|
||||
threads := sql.NewBackgroundThreads()
|
||||
prollyKv := newTestProllyKv(t, threads)
|
||||
|
||||
h := hash.Parse(strings.Repeat("a", hash.StringLen))
|
||||
h2 := hash.Parse(strings.Repeat("b", hash.StringLen))
|
||||
k := getBucketKey(h, 2)
|
||||
|
||||
tupB := val.NewTupleBuilder(val.NewTupleDescriptor(
|
||||
val.Type{Enc: val.Int64Enc, Nullable: true},
|
||||
val.Type{Enc: val.StringEnc, Nullable: true},
|
||||
))
|
||||
|
||||
t.Run("TestBoundsRoundTrip", func(t *testing.T) {
|
||||
exp := sql.Row{1, 1}
|
||||
prollyKv.PutBound(h, exp, 2)
|
||||
cmp, ok := prollyKv.GetBound(h, 2)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, exp, cmp)
|
||||
|
||||
_, ok = prollyKv.GetBound(h2, 2)
|
||||
require.False(t, ok)
|
||||
})
|
||||
|
||||
t.Run("TestTemplatesRoundTrip", func(t *testing.T) {
|
||||
exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}}
|
||||
key := templateCacheKey{
|
||||
h: h,
|
||||
idxName: "PRIMARY",
|
||||
}
|
||||
prollyKv.PutTemplate(key, exp)
|
||||
cmp, ok := prollyKv.GetTemplate(key)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, exp, cmp)
|
||||
|
||||
key2 := templateCacheKey{
|
||||
h: h2,
|
||||
idxName: "PRIMARY",
|
||||
}
|
||||
_, ok = prollyKv.GetTemplate(key2)
|
||||
require.False(t, ok)
|
||||
})
|
||||
t.Run("TestBucketsRoundTrip", func(t *testing.T) {
|
||||
exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
|
||||
err := prollyKv.PutBucket(context.Background(), h, exp, tupB)
|
||||
require.NoError(t, err)
|
||||
cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
|
||||
require.NoError(t, err)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, exp, cmp)
|
||||
|
||||
// delete from memory, should pull from disk when |tupB| supplied
|
||||
delete(prollyKv.mem.buckets, k)
|
||||
|
||||
cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB)
|
||||
require.NoError(t, err)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, exp.RowCnt, cmp.RowCnt)
|
||||
require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt)
|
||||
require.Equal(t, exp.NullCnt, cmp.NullCnt)
|
||||
require.Equal(t, exp.McvsCnt, cmp.McvsCnt)
|
||||
require.Equal(t, exp.McvVals[0], cmp.McvVals[0])
|
||||
require.Equal(t, exp.McvVals[1], cmp.McvVals[1])
|
||||
require.Equal(t, exp.McvVals[2], cmp.McvVals[2])
|
||||
require.Equal(t, exp.McvVals[3], cmp.McvVals[3])
|
||||
require.Equal(t, exp.BoundVal, cmp.BoundVal)
|
||||
require.Equal(t, exp.BoundCnt, cmp.BoundCnt)
|
||||
})
|
||||
t.Run("TestNilMcvsRoundTrip", func(t *testing.T) {
|
||||
exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}}).(*stats.Bucket)
|
||||
err := prollyKv.PutBucket(context.Background(), h, exp, tupB)
|
||||
|
||||
delete(prollyKv.mem.buckets, k)
|
||||
|
||||
cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
|
||||
require.NoError(t, err)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, exp.RowCnt, cmp.RowCnt)
|
||||
require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt)
|
||||
require.Equal(t, exp.NullCnt, cmp.NullCnt)
|
||||
require.Equal(t, exp.McvsCnt, cmp.McvsCnt)
|
||||
require.Equal(t, len(exp.McvVals), len(cmp.McvVals))
|
||||
require.Equal(t, exp.McvVals[0], cmp.McvVals[0])
|
||||
require.Equal(t, exp.McvVals[1], cmp.McvVals[1])
|
||||
require.Equal(t, exp.BoundVal, cmp.BoundVal)
|
||||
require.Equal(t, exp.BoundCnt, cmp.BoundCnt)
|
||||
})
|
||||
t.Run("TestGcGenBlocking", func(t *testing.T) {
|
||||
to := NewMemStats()
|
||||
from := NewMemStats()
|
||||
from.gcGen = 1
|
||||
require.False(t, to.GcMark(from, nil, nil, 0, nil))
|
||||
})
|
||||
t.Run("TestGcMarkFlush", func(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
bthreads := sql.NewBackgroundThreads()
|
||||
defer bthreads.Shutdown()
|
||||
prev := NewMemStats()
|
||||
nodes1, bucks1 := testNodes(t, 10, 1)
|
||||
nodes2, bucks2 := testNodes(t, 10, 2)
|
||||
nodes3, bucks3 := testNodes(t, 10, 3)
|
||||
for i := range nodes1 {
|
||||
require.NoError(t, prev.PutBucket(ctx, nodes1[i].HashOf(), bucks1[i], tupB))
|
||||
}
|
||||
for i := range nodes2 {
|
||||
require.NoError(t, prev.PutBucket(ctx, nodes2[i].HashOf(), bucks2[i], tupB))
|
||||
}
|
||||
for i := range nodes3 {
|
||||
require.NoError(t, prev.PutBucket(ctx, nodes3[i].HashOf(), bucks3[i], tupB))
|
||||
}
|
||||
|
||||
require.Equal(t, 30, prev.Len())
|
||||
|
||||
to := NewMemStats()
|
||||
require.True(t, to.GcMark(prev, nodes1, bucks1, 2, tupB))
|
||||
require.True(t, to.GcMark(prev, nodes2, bucks2, 2, tupB))
|
||||
|
||||
require.Equal(t, 1, len(to.gcFlusher))
|
||||
require.Equal(t, 20, len(to.gcFlusher[tupB]))
|
||||
require.Equal(t, 20, to.Len())
|
||||
|
||||
kv := newTestProllyKv(t, bthreads)
|
||||
kv.mem = to
|
||||
cnt, err := kv.Flush(ctx)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 20, cnt)
|
||||
})
|
||||
}
|
||||
|
||||
func newTestProllyKv(t *testing.T, threads *sql.BackgroundThreads) *prollyStats {
|
||||
dEnv := dtestutils.CreateTestEnv()
|
||||
|
||||
sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads)
|
||||
ctx.Session.SetClient(sql.Client{
|
||||
User: "billy boy",
|
||||
Address: "bigbillie@fake.horse",
|
||||
})
|
||||
require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb"))
|
||||
require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
|
||||
|
||||
startDbs := sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx)
|
||||
|
||||
kv, err := NewProllyStats(ctx, startDbs[0].(dsess.SqlDatabase))
|
||||
require.NoError(t, err)
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func testNodes(t *testing.T, cnt int, seed uint8) ([]tree.Node, []*stats.Bucket) {
|
||||
ts := &chunks.TestStorage{}
|
||||
ns := tree.NewNodeStore(ts.NewViewWithFormat(types.Format_DOLT.VersionString()))
|
||||
s := message.NewBlobSerializer(ns.Pool())
|
||||
|
||||
var nodes []tree.Node
|
||||
var buckets []*stats.Bucket
|
||||
for i := range cnt {
|
||||
vals := [][]byte{{uint8(i), seed, 1, 1}}
|
||||
msg := s.Serialize([][]byte{{0}}, vals, []uint64{1}, 0)
|
||||
node, _, err := tree.NodeFromBytes(msg)
|
||||
require.NoError(t, err)
|
||||
nodes = append(nodes, node)
|
||||
buckets = append(buckets, &stats.Bucket{RowCnt: uint64(i), BoundVal: sql.Row{i, "col2"}})
|
||||
}
|
||||
return nodes, buckets
|
||||
}
|
||||
@@ -1,535 +0,0 @@
|
||||
// Copyright 2023 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
)
|
||||
|
||||
var ErrFailedToLoad = errors.New("failed to load statistics")
|
||||
|
||||
type indexMeta struct {
|
||||
qual sql.StatQualifier
|
||||
cols []string
|
||||
newNodes []tree.Node
|
||||
// updateOrdinals are [start, stop] tuples for each update chunk
|
||||
updateOrdinals []updateOrdinal
|
||||
keepChunks []sql.HistogramBucket
|
||||
dropChunks []sql.HistogramBucket
|
||||
allAddrs []hash.Hash
|
||||
}
|
||||
|
||||
type updateOrdinal struct {
|
||||
start, stop uint64
|
||||
}
|
||||
|
||||
func NewProvider(pro *sqle.DoltDatabaseProvider, sf StatsFactory) *Provider {
|
||||
return &Provider{
|
||||
pro: pro,
|
||||
sf: sf,
|
||||
mu: &sync.Mutex{},
|
||||
statDbs: make(map[string]Database),
|
||||
autoCtxCancelers: make(map[string]context.CancelFunc),
|
||||
analyzeCtxCancelers: make(map[string]context.CancelFunc),
|
||||
status: make(map[string]string),
|
||||
lockedTables: make(map[string]bool),
|
||||
}
|
||||
}
|
||||
|
||||
// Provider is the engine interface for reading and writing index statistics.
|
||||
// Each database has its own statistics table that all tables/indexes in a db
|
||||
// share.
|
||||
type Provider struct {
|
||||
mu *sync.Mutex
|
||||
pro *sqle.DoltDatabaseProvider
|
||||
sf StatsFactory
|
||||
statDbs map[string]Database
|
||||
autoCtxCancelers map[string]context.CancelFunc
|
||||
analyzeCtxCancelers map[string]context.CancelFunc
|
||||
starter sqle.InitDatabaseHook
|
||||
status map[string]string
|
||||
lockedTables map[string]bool
|
||||
}
|
||||
|
||||
// each database has one statistics table that is a collection of the
|
||||
// table stats in the database
|
||||
type dbToStats struct {
|
||||
mu *sync.Mutex
|
||||
dbName string
|
||||
stats map[sql.StatQualifier]*DoltStats
|
||||
statsDatabase Database
|
||||
latestTableHashes map[string]hash.Hash
|
||||
}
|
||||
|
||||
func newDbStats(dbName string) *dbToStats {
|
||||
return &dbToStats{
|
||||
mu: &sync.Mutex{},
|
||||
dbName: dbName,
|
||||
stats: make(map[sql.StatQualifier]*DoltStats),
|
||||
latestTableHashes: make(map[string]hash.Hash),
|
||||
}
|
||||
}
|
||||
|
||||
var _ sql.StatsProvider = (*Provider)(nil)
|
||||
|
||||
func (p *Provider) Close() error {
|
||||
var lastErr error
|
||||
for _, db := range p.statDbs {
|
||||
if err := db.Close(); err != nil {
|
||||
lastErr = err
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func (p *Provider) TryLockForUpdate(branch, db, table string) bool {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
|
||||
if ok := p.lockedTables[lockId]; ok {
|
||||
return false
|
||||
}
|
||||
p.lockedTables[lockId] = true
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *Provider) UnlockTable(branch, db, table string) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
|
||||
p.lockedTables[lockId] = false
|
||||
return
|
||||
}
|
||||
|
||||
func (p *Provider) StartRefreshThread(ctx *sql.Context, pro dsess.DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error {
|
||||
err := p.starter(ctx, pro.(*sqle.DoltDatabaseProvider), name, env, db)
|
||||
|
||||
if err != nil {
|
||||
p.UpdateStatus(name, fmt.Sprintf("error restarting thread %s: %s", name, err.Error()))
|
||||
return err
|
||||
}
|
||||
p.UpdateStatus(name, fmt.Sprintf("restarted thread: %s", name))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Provider) SetStarter(hook sqle.InitDatabaseHook) {
|
||||
p.starter = hook
|
||||
}
|
||||
|
||||
func (p *Provider) CancelRefreshThread(dbName string) {
|
||||
p.mu.Lock()
|
||||
if cancel, ok := p.autoCtxCancelers[dbName]; ok {
|
||||
cancel()
|
||||
}
|
||||
p.mu.Unlock()
|
||||
p.UpdateStatus(dbName, fmt.Sprintf("cancelled thread: %s", dbName))
|
||||
|
||||
}
|
||||
|
||||
func (p *Provider) ThreadStatus(dbName string) string {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
if msg, ok := p.status[dbName]; ok {
|
||||
return msg
|
||||
}
|
||||
return "no active stats thread"
|
||||
}
|
||||
|
||||
func (p *Provider) TrackedBranches(dbName string) []string {
|
||||
db, ok := p.getStatDb(dbName)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return db.Branches()
|
||||
|
||||
}
|
||||
|
||||
func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var schemaName string
|
||||
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
|
||||
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
|
||||
}
|
||||
|
||||
return p.GetTableDoltStats(ctx, branch, db, schemaName, table.Name())
|
||||
}
|
||||
|
||||
func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) {
|
||||
statDb, ok := p.getStatDb(db)
|
||||
if !ok || statDb == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if branch == "" {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
var err error
|
||||
branch, err = dSess.GetBranch()
|
||||
if err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
var ret []sql.Statistic
|
||||
for _, qual := range statDb.ListStatQuals(branch) {
|
||||
if strings.EqualFold(db, qual.Database) && strings.EqualFold(schema, qual.Sch) && strings.EqualFold(table, qual.Tab) {
|
||||
stat, _ := statDb.GetStat(branch, qual)
|
||||
ret = append(ret, stat)
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (p *Provider) setStatDb(name string, db Database) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
p.statDbs[name] = db
|
||||
}
|
||||
|
||||
func (p *Provider) getStatDb(name string) (Database, bool) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
statDb, ok := p.statDbs[strings.ToLower(name)]
|
||||
return statDb, ok
|
||||
}
|
||||
|
||||
func (p *Provider) deleteStatDb(name string) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
delete(p.statDbs, strings.ToLower(name))
|
||||
}
|
||||
|
||||
func (p *Provider) SetStats(ctx *sql.Context, s sql.Statistic) error {
|
||||
statDb, ok := p.getStatDb(s.Qualifier().Db())
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
doltStat, err := DoltStatsFromSql(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.UpdateStatus(s.Qualifier().Db(), fmt.Sprintf("refreshed %s", s.Qualifier().Db()))
|
||||
|
||||
return statDb.SetStat(ctx, branch, s.Qualifier(), doltStat)
|
||||
}
|
||||
|
||||
func (p *Provider) getQualStats(ctx *sql.Context, qual sql.StatQualifier) (*DoltStats, bool) {
|
||||
statDb, ok := p.getStatDb(qual.Db())
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
return statDb.GetStat(branch, qual)
|
||||
}
|
||||
|
||||
func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) (sql.Statistic, bool) {
|
||||
stat, ok := p.getQualStats(ctx, qual)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
return stat, true
|
||||
}
|
||||
|
||||
func (p *Provider) DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error {
|
||||
statDb, ok := p.getStatDb(db)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
p.status[db] = "dropped"
|
||||
|
||||
return statDb.DeleteBranchStats(ctx, branch, flush)
|
||||
}
|
||||
|
||||
func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error {
|
||||
statDb, ok := p.getStatDb(db)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
for _, branch := range statDb.Branches() {
|
||||
// remove provider access
|
||||
p.DropBranchDbStats(ctx, branch, db, flush)
|
||||
}
|
||||
|
||||
if flush {
|
||||
p.deleteStatDb(db)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Provider) DropStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) error {
|
||||
statDb, ok := p.getStatDb(qual.Db())
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, ok := statDb.GetStat(branch, qual); ok {
|
||||
statDb.DeleteStats(ctx, branch, qual)
|
||||
p.UpdateStatus(qual.Db(), fmt.Sprintf("dropped statisic: %s", qual.String()))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Provider) UpdateStatus(db string, msg string) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
p.status[db] = msg
|
||||
}
|
||||
|
||||
func (p *Provider) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
|
||||
statDb, ok := p.getStatDb(db)
|
||||
if !ok {
|
||||
return 0, sql.ErrDatabaseNotFound.New(db)
|
||||
}
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var schemaName string
|
||||
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
|
||||
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
|
||||
}
|
||||
|
||||
priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
|
||||
if !ok {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return priStats.RowCount(), nil
|
||||
}
|
||||
|
||||
func (p *Provider) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
|
||||
statDb, ok := p.getStatDb(db)
|
||||
if !ok {
|
||||
return 0, sql.ErrDatabaseNotFound.New(db)
|
||||
}
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
branch, err := dSess.GetBranch()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var schemaName string
|
||||
if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
|
||||
schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
|
||||
}
|
||||
|
||||
priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
|
||||
if !ok {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return priStats.AvgSize(), nil
|
||||
}
|
||||
|
||||
func (p *Provider) Prune(ctx *sql.Context) error {
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
|
||||
for _, sqlDb := range p.pro.DoltDatabases() {
|
||||
dbName := strings.ToLower(sqlDb.Name())
|
||||
sqlDb, ok, err := dSess.Provider().SessionDatabase(ctx, dbName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
statDb, ok := p.getStatDb(dbName)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Canceling refresh thread prevents background thread from
|
||||
// making progress. Prune should succeed.
|
||||
p.CancelRefreshThread(dbName)
|
||||
|
||||
tables, err := sqlDb.GetTableNames(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, branch := range statDb.Branches() {
|
||||
err := func() error {
|
||||
// function closure ensures safe defers
|
||||
var stats []sql.Statistic
|
||||
for _, t := range tables {
|
||||
// XXX: avoid races with ANALYZE with the table locks.
|
||||
// Either concurrent purge or analyze (or both) will fail.
|
||||
if !p.TryLockForUpdate(branch, dbName, t) {
|
||||
p.mu.Lock()
|
||||
fmt.Println(p.lockedTables)
|
||||
p.mu.Unlock()
|
||||
return fmt.Errorf("concurrent statistics update and prune; retry prune when update is finished")
|
||||
}
|
||||
defer p.UnlockTable(branch, dbName, t)
|
||||
|
||||
tableStats, err := p.GetTableDoltStats(ctx, branch, dbName, sqlDb.SchemaName(), t)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats = append(stats, tableStats...)
|
||||
}
|
||||
|
||||
if err := p.DropBranchDbStats(ctx, branch, dbName, true); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, s := range stats {
|
||||
ds, ok := s.(*DoltStats)
|
||||
if !ok {
|
||||
return fmt.Errorf("unexpected statistics type found: %T", s)
|
||||
}
|
||||
if err := statDb.SetStat(ctx, branch, ds.Qualifier(), ds); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := statDb.Flush(ctx, branch); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Provider) Purge(ctx *sql.Context) error {
|
||||
for _, sqlDb := range p.pro.DoltDatabases() {
|
||||
dbName := strings.ToLower(sqlDb.Name())
|
||||
|
||||
tables, err := sqlDb.GetTableNames(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var branches []string
|
||||
db, ok := p.getStatDb(dbName)
|
||||
if ok {
|
||||
// Canceling refresh thread prevents background thread from
|
||||
// making progress. Purge should succeed.
|
||||
p.CancelRefreshThread(dbName)
|
||||
|
||||
branches = db.Branches()
|
||||
for _, branch := range branches {
|
||||
err := func() error {
|
||||
for _, t := range tables {
|
||||
// XXX: avoid races with ANALYZE with the table locks.
|
||||
// Either concurrent purge or analyze (or both) will fail.
|
||||
if !p.TryLockForUpdate(branch, dbName, t) {
|
||||
return fmt.Errorf("concurrent statistics update and prune; retry purge when update is finished")
|
||||
}
|
||||
defer p.UnlockTable(branch, dbName, t)
|
||||
}
|
||||
|
||||
err := p.DropBranchDbStats(ctx, branch, dbName, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to drop stats: %w", err)
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the database's failed to load, we still want to delete the folder
|
||||
|
||||
fs, err := p.pro.FileSystemForDatabase(dbName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//remove from filesystem
|
||||
statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if ok, _ := statsFs.Exists(""); ok {
|
||||
if err := statsFs.Delete("", true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
dropDbLoc, err := statsFs.Abs("")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(branches) == 0 {
|
||||
// if stats db was invalid on startup, recreate from baseline
|
||||
branches = p.getStatsBranches(ctx)
|
||||
}
|
||||
p.Load(ctx, fs, sqlDb, branches)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,639 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package statspro
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/stats"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/prolly/tree"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
const collectBatchSize = 20
|
||||
|
||||
func (sc *StatsController) CollectOnce(ctx context.Context) (string, error) {
|
||||
genStart := sc.genCnt.Load()
|
||||
newStats, err := sc.newStatsForRoot(ctx, nil)
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return "", nil
|
||||
} else if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if ok, err := sc.trySwapStats(ctx, genStart, newStats, nil); err != nil || !ok {
|
||||
return "", err
|
||||
}
|
||||
return newStats.String(), nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) runWorker(ctx context.Context) (err error) {
|
||||
var gcKv *memStats
|
||||
var newStats *rootStats
|
||||
gcTicker := time.NewTicker(sc.gcInterval)
|
||||
for {
|
||||
// This loops tries to update stats as long as context
|
||||
// is active. Thread contexts governs who "owns" the update
|
||||
// process. The generation counters ensure atomic swapping.
|
||||
|
||||
gcKv = nil
|
||||
genStart := sc.genCnt.Load()
|
||||
|
||||
select {
|
||||
case <-gcTicker.C:
|
||||
sc.setDoGc(false)
|
||||
default:
|
||||
}
|
||||
|
||||
if sc.gcIsSet() {
|
||||
gcKv = NewMemStats()
|
||||
gcKv.gcGen = genStart
|
||||
}
|
||||
|
||||
newStats, err = sc.newStatsForRoot(ctx, gcKv)
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
} else if err != nil {
|
||||
sc.descError("", err)
|
||||
}
|
||||
|
||||
if ok, err := sc.trySwapStats(ctx, genStart, newStats, gcKv); err != nil {
|
||||
if !ok {
|
||||
sc.descError("failed to swap stats", err)
|
||||
} else {
|
||||
sc.descError("swapped stats with flush failure", err)
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// is double check necessary?
|
||||
return context.Cause(ctx)
|
||||
default:
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (sc *StatsController) trySwapStats(ctx context.Context, prevGen uint64, newStats *rootStats, gcKv *memStats) (ok bool, err error) {
|
||||
if newStats == nil {
|
||||
return false, fmt.Errorf("attempted to place a nil stats object")
|
||||
}
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
|
||||
if ctx.Err() != nil {
|
||||
// final ctx check in critical section, avoid races on
|
||||
// stats after calling stop
|
||||
return false, context.Cause(ctx)
|
||||
}
|
||||
|
||||
signal := leSwap
|
||||
defer func() {
|
||||
if ok {
|
||||
sc.logger.Debugf("stats successful swap: %s\n", newStats.String())
|
||||
sc.signalListener(signal)
|
||||
}
|
||||
}()
|
||||
|
||||
if sc.genCnt.CompareAndSwap(prevGen, prevGen+1) {
|
||||
// Replace stats and new Kv if no replacements happened
|
||||
// in-between.
|
||||
sc.Stats = newStats
|
||||
if gcKv != nil {
|
||||
signal |= leGc
|
||||
// The new KV has all buckets for the latest root stats,
|
||||
// background job will to swap the disk location and put
|
||||
// entries into a prolly tree.
|
||||
if prevGen != gcKv.GcGen() {
|
||||
err = fmt.Errorf("gc gen didn't match update gen")
|
||||
return
|
||||
}
|
||||
sc.doGc = false
|
||||
sc.gcCnt++
|
||||
sc.kv = gcKv
|
||||
ok = true
|
||||
if !sc.memOnly {
|
||||
func() {
|
||||
sc.mu.Unlock()
|
||||
defer sc.mu.Lock()
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
return sc.rotateStorage(ctx)
|
||||
}); err != nil {
|
||||
sc.descError("", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
// Flush new changes to disk, unlocked
|
||||
if !sc.memOnly {
|
||||
func() {
|
||||
sc.mu.Unlock()
|
||||
defer sc.mu.Lock()
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
_, err := sc.Flush(ctx)
|
||||
return err
|
||||
}); err != nil {
|
||||
sc.descError("", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
signal = signal | leFlush
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) newStatsForRoot(baseCtx context.Context, gcKv *memStats) (newStats *rootStats, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = fmt.Errorf("issuer panicked running work: %s", r)
|
||||
}
|
||||
if err != nil {
|
||||
sc.descError("stats update interrupted", err)
|
||||
}
|
||||
}()
|
||||
|
||||
ctx, err := sc.ctxGen(baseCtx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer sql.SessionEnd(ctx.Session)
|
||||
|
||||
dSess := dsess.DSessFromSess(ctx.Session)
|
||||
var dbs []sql.Database
|
||||
func() {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
dbs = dSess.Provider().AllDatabases(ctx)
|
||||
}()
|
||||
newStats = newRootStats()
|
||||
for _, db := range dbs {
|
||||
sqlDb, ok := db.(sqle.Database)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
var branches []ref.DoltRef
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
ddb, ok := dSess.GetDoltDB(ctx, db.Name())
|
||||
if !ok {
|
||||
return fmt.Errorf("get dolt db dolt database not found %s", db.Name())
|
||||
}
|
||||
var err error // races with outer err
|
||||
branches, err = ddb.GetBranches(ctx)
|
||||
return err
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, br := range branches {
|
||||
// this call avoids the chunkstore
|
||||
sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), br.GetPath(), br.GetPath()+"/"+sqlDb.AliasedName())
|
||||
if err != nil {
|
||||
sc.descError("revisionForBranch", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var schDbs []sql.DatabaseSchema
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
schDbs, err = sqlDb.AllSchemas(ctx)
|
||||
return err
|
||||
}); err != nil {
|
||||
sc.descError("getDatabaseSchemas", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, sqlDb := range schDbs {
|
||||
switch sqlDb.SchemaName() {
|
||||
case "dolt", "information_schema", "pg_catalog":
|
||||
continue
|
||||
}
|
||||
var tableNames []string
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
tableNames, err = sqlDb.GetTableNames(ctx)
|
||||
return err
|
||||
}); err != nil {
|
||||
sc.descError("getTableNames", err)
|
||||
continue
|
||||
}
|
||||
|
||||
newStats.DbCnt++
|
||||
|
||||
for _, tableName := range tableNames {
|
||||
err := sc.updateTable(ctx, newStats, tableName, sqlDb.(dsess.SqlDatabase), gcKv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return newStats, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) preexistingStats(k tableIndexesKey, h hash.Hash) ([]*stats.Statistic, bool) {
|
||||
sc.mu.Lock()
|
||||
defer sc.mu.Unlock()
|
||||
if sc.Stats.hashes[k].Equal(h) {
|
||||
return sc.Stats.stats[k], true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (sc *StatsController) finalizeHistogram(template stats.Statistic, buckets []*stats.Bucket, firstBound sql.Row) *stats.Statistic {
|
||||
template.LowerBnd = firstBound
|
||||
for _, b := range buckets {
|
||||
// accumulate counts
|
||||
template.RowCnt += b.RowCnt
|
||||
template.DistinctCnt += b.DistinctCnt
|
||||
template.NullCnt += b.NullCnt
|
||||
template.Hist = append(template.Hist, b)
|
||||
}
|
||||
return &template
|
||||
}
|
||||
|
||||
func (sc *StatsController) collectIndexNodes(ctx *sql.Context, prollyMap prolly.Map, idxLen int, nodes []tree.Node) ([]*stats.Bucket, sql.Row, int, error) {
|
||||
updater := newBucketBuilder(sql.StatQualifier{}, idxLen, prollyMap.KeyDesc())
|
||||
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen))
|
||||
|
||||
firstNodeHash := nodes[0].HashOf()
|
||||
lowerBound, ok := sc.kv.GetBound(firstNodeHash, idxLen)
|
||||
if !ok {
|
||||
sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
var err error
|
||||
lowerBound, err = firstRowForIndex(ctx, idxLen, prollyMap, keyBuilder)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get histogram bucket for node; %w", err)
|
||||
}
|
||||
if sc.Debug {
|
||||
log.Printf("put bound: %s: %v\n", firstNodeHash.String()[:5], lowerBound)
|
||||
}
|
||||
|
||||
sc.kv.PutBound(firstNodeHash, lowerBound, idxLen)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
var writes int
|
||||
var offset uint64
|
||||
for i := 0; i < len(nodes); {
|
||||
err := sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
|
||||
newWrites := 0
|
||||
for i < len(nodes) && newWrites < collectBatchSize {
|
||||
n := nodes[i]
|
||||
i++
|
||||
|
||||
treeCnt, err := n.TreeCount()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
start, stop := offset, offset+uint64(treeCnt)
|
||||
offset = stop
|
||||
|
||||
if _, ok, err := sc.GetBucket(ctx, n.HashOf(), keyBuilder); err != nil {
|
||||
return err
|
||||
} else if ok {
|
||||
continue
|
||||
}
|
||||
|
||||
writes++
|
||||
newWrites++
|
||||
|
||||
updater.newBucket()
|
||||
|
||||
// we read exclusive range [node first key, next node first key)
|
||||
iter, err := prollyMap.IterOrdinalRange(ctx, start, stop)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for {
|
||||
// stats key will be a prefix of the index key
|
||||
keyBytes, _, err := iter.Next(ctx)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
// build full key
|
||||
for i := range keyBuilder.Desc.Types {
|
||||
keyBuilder.PutRaw(i, keyBytes.GetField(i))
|
||||
}
|
||||
|
||||
updater.add(ctx, keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
|
||||
keyBuilder.Recycle()
|
||||
}
|
||||
|
||||
// finalize the aggregation
|
||||
newBucket, err := updater.finalize(ctx, prollyMap.NodeStore())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := sc.PutBucket(ctx, n.HashOf(), newBucket, keyBuilder); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
var buckets []*stats.Bucket
|
||||
for _, n := range nodes {
|
||||
newBucket, ok, err := sc.GetBucket(ctx, n.HashOf(), keyBuilder)
|
||||
if err != nil || !ok {
|
||||
sc.descError(fmt.Sprintf("missing histogram bucket for node %s", n.HashOf().String()[:5]), err)
|
||||
return nil, nil, 0, err
|
||||
}
|
||||
buckets = append(buckets, newBucket)
|
||||
}
|
||||
|
||||
return buckets, lowerBound, writes, nil
|
||||
}
|
||||
|
||||
func (sc *StatsController) updateTable(ctx *sql.Context, newStats *rootStats, tableName string, sqlDb dsess.SqlDatabase, gcKv *memStats) error {
|
||||
var err error
|
||||
var sqlTable *sqle.DoltTable
|
||||
var dTab *doltdb.Table
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
sqlTable, dTab, err = GetLatestTable(ctx, tableName, sqlDb)
|
||||
return err
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
schemaName := sqlTable.DatabaseSchema().SchemaName()
|
||||
|
||||
tableKey := tableIndexesKey{
|
||||
db: strings.ToLower(sqlDb.AliasedName()),
|
||||
branch: strings.ToLower(sqlDb.Revision()),
|
||||
table: strings.ToLower(tableName),
|
||||
schema: strings.ToLower(schemaName),
|
||||
}
|
||||
|
||||
tableHash, err := dTab.HashOf()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if gcKv == nil {
|
||||
if stats, ok := sc.preexistingStats(tableKey, tableHash); ok {
|
||||
newStats.stats[tableKey] = stats
|
||||
newStats.hashes[tableKey] = tableHash
|
||||
newStats.TablesSkipped++
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var indexes []sql.Index
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
indexes, err = sqlTable.GetIndexes(ctx)
|
||||
return err
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var newTableStats []*stats.Statistic
|
||||
for _, sqlIdx := range indexes {
|
||||
if sqlIdx.IsSpatial() || sqlIdx.IsFullText() || sqlIdx.IsGenerated() || sqlIdx.IsVector() {
|
||||
continue
|
||||
}
|
||||
var idx durable.Index
|
||||
var err error
|
||||
var prollyMap prolly.Map
|
||||
func() {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
if strings.EqualFold(sqlIdx.ID(), "PRIMARY") {
|
||||
idx, err = dTab.GetRowData(ctx)
|
||||
} else {
|
||||
idx, err = dTab.GetIndexRowData(ctx, sqlIdx.ID())
|
||||
}
|
||||
if err == nil {
|
||||
prollyMap, err = durable.ProllyMapFromIndex(idx)
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
sc.descError("GetRowData", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var template stats.Statistic
|
||||
if err := sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
_, template, err = sc.getTemplate(ctx, sqlTable, sqlIdx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stats collection failed to generate a statistic template: %s.%s.%s:%T; %s", sqlDb.RevisionQualifiedName(), tableName, sqlIdx, sqlIdx, err.Error())
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
} else if template.Fds.Empty() {
|
||||
return fmt.Errorf("failed to creat template for %s/%s/%s/%s", sqlDb.Revision(), sqlDb.AliasedName(), tableName, sqlIdx.ID())
|
||||
}
|
||||
|
||||
template.Qual.Database = sqlDb.AliasedName()
|
||||
|
||||
idxLen := len(sqlIdx.Expressions())
|
||||
|
||||
var levelNodes []tree.Node
|
||||
if err = sc.sq.DoSync(ctx, func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
levelNodes, err = tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
|
||||
if err != nil {
|
||||
sc.descError("get level", err)
|
||||
}
|
||||
return err
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
var buckets []*stats.Bucket
|
||||
var firstBound sql.Row
|
||||
if len(levelNodes) > 0 {
|
||||
var writes int
|
||||
buckets, firstBound, writes, err = sc.collectIndexNodes(ctx, prollyMap, idxLen, levelNodes)
|
||||
if err != nil {
|
||||
sc.descError("", err)
|
||||
continue
|
||||
}
|
||||
newStats.BucketWrites += writes
|
||||
}
|
||||
|
||||
newTableStats = append(newTableStats, sc.finalizeHistogram(template, buckets, firstBound))
|
||||
|
||||
if gcKv != nil {
|
||||
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen))
|
||||
if !gcKv.GcMark(sc.kv, levelNodes, buckets, idxLen, keyBuilder) {
|
||||
return fmt.Errorf("GC interrupted updated")
|
||||
}
|
||||
if err := func() error {
|
||||
sql.SessionCommandBegin(ctx.Session)
|
||||
defer sql.SessionCommandEnd(ctx.Session)
|
||||
schHash, _, err := sqlTable.IndexCacheKey(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
|
||||
if t, ok := sc.GetTemplate(key); ok {
|
||||
gcKv.PutTemplate(key, t)
|
||||
}
|
||||
return nil
|
||||
}(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
newStats.stats[tableKey] = newTableStats
|
||||
newStats.hashes[tableKey] = tableHash
|
||||
newStats.TablesProcessed++
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetLatestTable will get the WORKING root table for the current database/branch
|
||||
func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (*sqle.DoltTable, *doltdb.Table, error) {
|
||||
var db sqle.Database
|
||||
switch d := sqlDb.(type) {
|
||||
case sqle.Database:
|
||||
db = d
|
||||
case sqle.ReadReplicaDatabase:
|
||||
db = d.Database
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
|
||||
}
|
||||
sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
|
||||
}
|
||||
|
||||
var dTab *doltdb.Table
|
||||
var sqleTable *sqle.DoltTable
|
||||
switch t := sqlTable.(type) {
|
||||
case *sqle.AlterableDoltTable:
|
||||
sqleTable = t.DoltTable
|
||||
dTab, err = t.DoltTable.DoltTable(ctx)
|
||||
case *sqle.WritableDoltTable:
|
||||
sqleTable = t.DoltTable
|
||||
dTab, err = t.DoltTable.DoltTable(ctx)
|
||||
case *sqle.DoltTable:
|
||||
sqleTable = t
|
||||
dTab, err = t.DoltTable(ctx)
|
||||
default:
|
||||
err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return sqleTable, dTab, nil
|
||||
}
|
||||
|
||||
type templateCacheKey struct {
|
||||
h hash.Hash
|
||||
idxName string
|
||||
}
|
||||
|
||||
func (k templateCacheKey) String() string {
|
||||
return k.idxName + "/" + k.h.String()[:5]
|
||||
}
|
||||
|
||||
func (sc *StatsController) getTemplate(ctx *sql.Context, sqlTable *sqle.DoltTable, sqlIdx sql.Index) (templateCacheKey, stats.Statistic, error) {
|
||||
schHash, _, err := sqlTable.IndexCacheKey(ctx)
|
||||
if err != nil {
|
||||
return templateCacheKey{}, stats.Statistic{}, err
|
||||
}
|
||||
key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
|
||||
if template, ok := sc.GetTemplate(key); ok {
|
||||
return key, template, nil
|
||||
}
|
||||
fds, colset, err := stats.IndexFds(strings.ToLower(sqlTable.Name()), sqlTable.Schema(), sqlIdx)
|
||||
if err != nil {
|
||||
return templateCacheKey{}, stats.Statistic{}, err
|
||||
}
|
||||
|
||||
var class sql.IndexClass
|
||||
switch {
|
||||
case sqlIdx.IsSpatial():
|
||||
class = sql.IndexClassSpatial
|
||||
case sqlIdx.IsFullText():
|
||||
class = sql.IndexClassFulltext
|
||||
default:
|
||||
class = sql.IndexClassDefault
|
||||
}
|
||||
|
||||
var types []sql.Type
|
||||
for _, cet := range sqlIdx.ColumnExpressionTypes() {
|
||||
types = append(types, cet.Type)
|
||||
}
|
||||
|
||||
// xxx: the lower here is load bearing, index comparison
|
||||
// expects the expressions to be stripped of table name.
|
||||
tablePrefix := strings.ToLower(sqlTable.Name()) + "."
|
||||
cols := make([]string, len(sqlIdx.Expressions()))
|
||||
for i, c := range sqlIdx.Expressions() {
|
||||
cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
|
||||
}
|
||||
|
||||
template := stats.Statistic{
|
||||
Qual: sql.NewStatQualifier("", "", sqlTable.Name(), sqlIdx.ID()),
|
||||
Cols: cols,
|
||||
Typs: types,
|
||||
IdxClass: uint8(class),
|
||||
Fds: fds,
|
||||
Colset: colset,
|
||||
}
|
||||
|
||||
// We put template twice, once for schema changes with no data
|
||||
// changes (here), and once when we put chunks to avoid GC dropping
|
||||
// templates before the finalize job.
|
||||
sc.PutTemplate(key, template)
|
||||
|
||||
return key, template, nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,7 @@ package sqle
|
||||
|
||||
import (
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/types"
|
||||
@@ -219,18 +220,18 @@ var DoltSystemVariables = []sql.SystemVariable{
|
||||
Default: int8(1),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsAutoRefreshEnabled,
|
||||
Name: dsess.DoltStatsEnabled,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
|
||||
Default: int8(0),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
|
||||
Default: int8(1),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsBootstrapEnabled,
|
||||
Name: dsess.DoltStatsPaused,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
|
||||
Default: int8(0),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsPaused),
|
||||
Default: int8(1),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsMemoryOnly,
|
||||
@@ -240,18 +241,25 @@ var DoltSystemVariables = []sql.SystemVariable{
|
||||
Default: int8(0),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsAutoRefreshThreshold,
|
||||
Name: dsess.DoltStatsJobInterval,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
|
||||
Default: float64(.5),
|
||||
Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
|
||||
Default: int64(30 * time.Millisecond / time.Millisecond),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsAutoRefreshInterval,
|
||||
Name: dsess.DoltStatsGCInterval,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
|
||||
Default: 600,
|
||||
Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
|
||||
Default: int64(time.Hour / time.Millisecond),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsGCEnabled,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsGCEnabled),
|
||||
Default: int8(1),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsBranches,
|
||||
@@ -446,18 +454,39 @@ func AddDoltSystemVariables() {
|
||||
Default: int8(0),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsAutoRefreshEnabled,
|
||||
Name: dsess.DoltStatsEnabled,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
|
||||
Default: int8(0),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
|
||||
Default: int8(1),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsBootstrapEnabled,
|
||||
Name: dsess.DoltStatsPaused,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
|
||||
Default: int8(0),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsPaused),
|
||||
Default: int8(1),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsGCInterval,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
|
||||
Default: int64(time.Hour / time.Millisecond),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsGCEnabled,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsGCEnabled),
|
||||
Default: int8(1),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsJobInterval,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
|
||||
Default: int64(30 * time.Millisecond / time.Millisecond),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsMemoryOnly,
|
||||
@@ -466,20 +495,6 @@ func AddDoltSystemVariables() {
|
||||
Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
|
||||
Default: int8(0),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsAutoRefreshThreshold,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
|
||||
Default: float64(.5),
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsAutoRefreshInterval,
|
||||
Dynamic: true,
|
||||
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
|
||||
Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
|
||||
Default: 120,
|
||||
},
|
||||
&sql.MysqlSystemVariable{
|
||||
Name: dsess.DoltStatsBranches,
|
||||
Dynamic: true,
|
||||
|
||||
@@ -129,12 +129,12 @@ func (t *DoltTable) LookupForExpressions(ctx *sql.Context, exprs ...sql.Expressi
|
||||
return sql.IndexLookup{}, nil, nil, false, nil
|
||||
}
|
||||
|
||||
dbState, ok, err := sess.LookupDbState(ctx, t.db.Name())
|
||||
dbState, ok, err := sess.LookupDbState(ctx, t.db.AliasedName())
|
||||
if err != nil {
|
||||
return sql.IndexLookup{}, nil, nil, false, nil
|
||||
}
|
||||
if !ok {
|
||||
return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.Name())
|
||||
return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.AliasedName())
|
||||
}
|
||||
|
||||
var lookupCols []expression.LookupColumn
|
||||
|
||||
@@ -517,7 +517,10 @@ func SqlRowsFromDurableIndex(idx durable.Index, sch schema.Schema) ([]sql.Row, e
|
||||
ctx := context.Background()
|
||||
var sqlRows []sql.Row
|
||||
if types.Format_Default == types.Format_DOLT {
|
||||
rowData := durable.ProllyMapFromIndex(idx)
|
||||
rowData, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
kd, vd := rowData.Descriptors()
|
||||
iter, err := rowData.IterAll(ctx)
|
||||
if err != nil {
|
||||
|
||||
@@ -141,6 +141,10 @@ func (db *UserSpaceDatabase) RequestedName() string {
|
||||
return db.Name()
|
||||
}
|
||||
|
||||
func (db *UserSpaceDatabase) AliasedName() string {
|
||||
return db.Name()
|
||||
}
|
||||
|
||||
func (db *UserSpaceDatabase) GetSchema(ctx *sql.Context, schemaName string) (sql.DatabaseSchema, bool, error) {
|
||||
panic(fmt.Sprintf("GetSchema is not implemented for database %T", db))
|
||||
}
|
||||
|
||||
@@ -36,7 +36,10 @@ func getPrimaryProllyWriter(ctx context.Context, t *doltdb.Table, schState *dses
|
||||
return prollyIndexWriter{}, err
|
||||
}
|
||||
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return prollyIndexWriter{}, err
|
||||
}
|
||||
|
||||
keyDesc, valDesc := m.Descriptors()
|
||||
|
||||
@@ -55,7 +58,10 @@ func getPrimaryKeylessProllyWriter(ctx context.Context, t *doltdb.Table, schStat
|
||||
return prollyKeylessWriter{}, err
|
||||
}
|
||||
|
||||
m := durable.ProllyMapFromIndex(idx)
|
||||
m, err := durable.ProllyMapFromIndex(idx)
|
||||
if err != nil {
|
||||
return prollyKeylessWriter{}, err
|
||||
}
|
||||
|
||||
keyDesc, valDesc := m.Descriptors()
|
||||
|
||||
|
||||
@@ -116,7 +116,10 @@ func getSecondaryKeylessProllyWriters(ctx context.Context, t *doltdb.Table, schS
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := durable.ProllyMapFromIndex(idxRows)
|
||||
m, err := durable.ProllyMapFromIndex(idxRows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
keyDesc, _ := m.Descriptors()
|
||||
|
||||
|
||||
@@ -102,7 +102,10 @@ func BuildProllyIndexExternal(ctx *sql.Context, vrw types.ValueReadWriter, ns tr
|
||||
defer it.Close()
|
||||
|
||||
empty, err := durable.NewEmptyIndexFromTableSchema(ctx, vrw, ns, idx, sch)
|
||||
secondary := durable.ProllyMapFromIndex(empty)
|
||||
secondary, err := durable.ProllyMapFromIndex(empty)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tupIter := &tupleIterWithCb{iter: it, prefixDesc: prefixDesc, uniqCb: uniqCb}
|
||||
ret, err := prolly.MutateMapWithTupleIter(ctx, secondary, tupIter)
|
||||
|
||||
@@ -150,7 +150,11 @@ func BuildSecondaryIndex(ctx *sql.Context, tbl *doltdb.Table, idx schema.Index,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
primary := durable.ProllyMapFromIndex(m)
|
||||
primary, err := durable.ProllyMapFromIndex(m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, idx, primary)
|
||||
|
||||
default:
|
||||
@@ -218,7 +222,10 @@ func BuildUniqueProllyIndex(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
secondary := durable.ProllyMapFromIndex(empty)
|
||||
secondary, err := durable.ProllyMapFromIndex(empty)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
iter, err := primary.IterAll(ctx)
|
||||
if err != nil {
|
||||
|
||||
+12
-1
@@ -34,12 +34,20 @@ func (b *Buff[T]) Len() int {
|
||||
return b.len
|
||||
}
|
||||
|
||||
func (b *Buff[T]) Cap() int {
|
||||
return cap(b.arr)
|
||||
}
|
||||
|
||||
func (b *Buff[T]) At(i int) T {
|
||||
return *b.at(i)
|
||||
}
|
||||
|
||||
func (b *Buff[T]) at(i int) *T {
|
||||
if i >= b.Len() {
|
||||
panic("At on Buff too small")
|
||||
}
|
||||
j := (b.front + i) % len(b.arr)
|
||||
return b.arr[j]
|
||||
return &b.arr[j]
|
||||
}
|
||||
|
||||
func (b *Buff[T]) Front() T {
|
||||
@@ -50,6 +58,9 @@ func (b *Buff[T]) Pop() {
|
||||
if b.Len() == 0 {
|
||||
panic("Pop empty Buff")
|
||||
}
|
||||
// Don't leak entries...
|
||||
var empty T
|
||||
*b.at(0) = empty
|
||||
b.front = (b.front + 1) % len(b.arr)
|
||||
b.len -= 1
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package valctx
|
||||
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
var enabled bool
|
||||
|
||||
// Globally enables context validation for the process. If this is not
|
||||
// called, then the other functions in this package are noops.
|
||||
func EnableContextValidation() {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
type ctxKey int
|
||||
|
||||
var validationKey ctxKey
|
||||
|
||||
func WithContextValidation(ctx context.Context) context.Context {
|
||||
if !enabled {
|
||||
return ctx
|
||||
}
|
||||
return context.WithValue(ctx, validationKey, new(Validation))
|
||||
}
|
||||
|
||||
type Validation func()
|
||||
|
||||
func SetContextValidation(ctx context.Context, validation Validation) {
|
||||
if !enabled {
|
||||
return
|
||||
}
|
||||
*ctx.Value(validationKey).(*Validation) = validation
|
||||
}
|
||||
|
||||
func ValidateContext(ctx context.Context) {
|
||||
if !enabled {
|
||||
return
|
||||
}
|
||||
(*ctx.Value(validationKey).(*Validation))()
|
||||
}
|
||||
@@ -21,9 +21,6 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -149,10 +146,6 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.collectStats(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fmt.Println("Running test", t.test.GetName())
|
||||
|
||||
rs, err := t.run(ctx)
|
||||
@@ -162,76 +155,3 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) {
|
||||
|
||||
return rs, nil
|
||||
}
|
||||
|
||||
func (t *sysbenchTesterImpl) collectStats(ctx context.Context) error {
|
||||
if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") {
|
||||
db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/test", t.serverConfig.GetHost(), t.serverConfig.GetPort()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return collectStats(ctx, db)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func collectStats(ctx context.Context, db *sqlx.DB) error {
|
||||
c, err := db.Connx(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
{
|
||||
// configuration, restart, and check needs to be in the same session
|
||||
tx, err := c.BeginTxx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_enabled = 1;"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_interval = 0;"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_interval = 0;"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := tx.Exec("call dolt_stats_restart();"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rows := map[string]interface{}{"cnt": 0}
|
||||
tick := time.NewTicker(5 * time.Second)
|
||||
for {
|
||||
if rows["cnt"] != 0 {
|
||||
fmt.Printf("collected %d histogram buckets\n", rows["cnt"])
|
||||
break
|
||||
}
|
||||
select {
|
||||
case <-tick.C:
|
||||
res, err := tx.Queryx("select count(*) as cnt from dolt_statistics;")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !res.Next() {
|
||||
return fmt.Errorf("failed to set statistics")
|
||||
}
|
||||
if err := res.MapScan(rows); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := res.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := c.QueryContext(ctx, "call dolt_stats_stop();"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -20,9 +20,6 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
type tpccTesterImpl struct {
|
||||
@@ -54,17 +51,6 @@ func (t *tpccTesterImpl) outputToResult(output []byte) (*Result, error) {
|
||||
return OutputToResult(output, t.serverConfig.GetServerType(), t.serverConfig.GetVersion(), t.test.GetName(), t.test.GetId(), t.suiteId, t.config.GetRuntimeOs(), t.config.GetRuntimeGoArch(), t.serverParams, t.test.GetParamsToSlice(), nil, false)
|
||||
}
|
||||
|
||||
func (t *tpccTesterImpl) collectStats(ctx context.Context) error {
|
||||
if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") {
|
||||
db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/sbt", t.serverConfig.GetHost(), t.serverConfig.GetPort()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return collectStats(ctx, db)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *tpccTesterImpl) prepare(ctx context.Context) error {
|
||||
args := t.test.GetPrepareArgs(t.serverConfig)
|
||||
cmd := exec.CommandContext(ctx, t.tpccCommand, args...)
|
||||
@@ -119,10 +105,6 @@ func (t *tpccTesterImpl) Test(ctx context.Context) (*Result, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.collectStats(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fmt.Println("Running test", t.test.GetName())
|
||||
|
||||
rs, err := t.run(ctx)
|
||||
|
||||
@@ -132,7 +132,7 @@ func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer](
|
||||
prev := newKey
|
||||
newKey, newValue = edits.NextMutation(ctx)
|
||||
if newKey != nil {
|
||||
assertTrue(order.Compare(ctx, K(newKey), K(prev)) > 0, "expected sorted edits")
|
||||
assertTrue(order.Compare(ctx, K(newKey), K(prev)) > 0, "expected sorted edits: %v, %v", prev, newKey)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -629,8 +629,8 @@ func fetchChild(ctx context.Context, ns NodeStore, ref hash.Hash) (Node, error)
|
||||
return ns.Read(ctx, ref)
|
||||
}
|
||||
|
||||
func assertTrue(b bool, msg string) {
|
||||
func assertTrue(b bool, msg string, args ...any) {
|
||||
if !b {
|
||||
panic("assertion failed: " + msg)
|
||||
panic(fmt.Sprintf("assertion failed: "+msg, args...))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,6 +141,11 @@ func GetChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m Static
|
||||
// GetHistogramLevel returns the highest internal level of the tree that has
|
||||
// more than |low| addresses.
|
||||
func GetHistogramLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], low int) ([]Node, error) {
|
||||
if cnt, err := m.Count(); err != nil {
|
||||
return nil, err
|
||||
} else if cnt == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
currentLevel := []Node{m.Root}
|
||||
level := m.Root.Level()
|
||||
for len(currentLevel) < low && level > 0 {
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
package val
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors"
|
||||
@@ -77,7 +78,7 @@ func NewTupleBuilder(desc TupleDesc) *TupleBuilder {
|
||||
func (tb *TupleBuilder) Build(pool pool.BuffPool) (tup Tuple) {
|
||||
for i, typ := range tb.Desc.Types {
|
||||
if !typ.Nullable && tb.fields[i] == nil {
|
||||
panic("cannot write NULL to non-NULL field")
|
||||
panic("cannot write NULL to non-NULL field: " + strconv.Itoa(i))
|
||||
}
|
||||
}
|
||||
return tb.BuildPermissive(pool)
|
||||
|
||||
@@ -636,11 +636,11 @@ func (td TupleDesc) formatValue(ctx context.Context, enc Encoding, i int, value
|
||||
case Hash128Enc:
|
||||
return hex.EncodeToString(value)
|
||||
case BytesAddrEnc:
|
||||
return hex.EncodeToString(value)
|
||||
return hash.New(value).String()
|
||||
case StringAddrEnc:
|
||||
return hex.EncodeToString(value)
|
||||
return hash.New(value).String()
|
||||
case CommitAddrEnc:
|
||||
return hex.EncodeToString(value)
|
||||
return hash.New(value).String()
|
||||
case CellEnc:
|
||||
return hex.EncodeToString(value)
|
||||
case ExtendedEnc:
|
||||
|
||||
+322
-510
@@ -22,12 +22,15 @@ SQL
|
||||
|
||||
cd $TMPDIRS/repo2
|
||||
dolt init
|
||||
dolt sql -q "SET @@PERSIST.dolt_stats_job_interval = 100"
|
||||
|
||||
dolt sql <<SQL
|
||||
create table xy (x int primary key, y int, key (y,x));
|
||||
create table ab (a int primary key, b int, key (b,a));
|
||||
SQL
|
||||
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_job_interval = 1;"
|
||||
|
||||
cd $TMPDIRS
|
||||
}
|
||||
|
||||
@@ -38,107 +41,344 @@ teardown() {
|
||||
cd $BATS_TMPDIR
|
||||
}
|
||||
|
||||
@test "stats: empty initial stats" {
|
||||
@test "stats: dolt_stats_once" {
|
||||
# running once populates stats and returns valid json response
|
||||
cd repo2
|
||||
|
||||
# disable bootstrap, can only make stats with ANALYZE or background thread
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
start_sql_server
|
||||
sleep 1
|
||||
stop_sql_server
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
run dolt sql -r csv -q "call dolt_stats_once()"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = .5"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
# auto refresh initialize at server startup
|
||||
start_sql_server
|
||||
|
||||
# need to trigger at least one refresh cycle
|
||||
sleep 1
|
||||
|
||||
# only statistics for non-empty tables are collected
|
||||
run dolt sql -r csv -q "select database_name, table_name, index_name from dolt_statistics order by index_name"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[0]}" = "database_name,table_name,index_name" ]
|
||||
[ "${lines[1]}" = "repo2,xy,primary" ]
|
||||
[ "${lines[2]}" = "repo2,xy,y" ]
|
||||
|
||||
# appending new chunks picked up
|
||||
dolt sql -q "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;"
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "8" ]
|
||||
|
||||
# updates picked up
|
||||
dolt sql -q "update xy set y = 2 where x between 100 and 800"
|
||||
|
||||
sleep 1
|
||||
|
||||
dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "8" ]
|
||||
[[ "$output" =~ '{""dbCnt"":1,""bucketWrites"":2,""tablesProcessed"":2,""tablesSkipped"":0}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: bootstrap on server startup" {
|
||||
|
||||
@test "stats: second once does no work" {
|
||||
# running once populates stats and returns valid json response
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
# disable higher precedence auto-update
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;"
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_once()"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${lines[3]}" =~ '{""dbCnt"":1,""bucketWrites"":0,""tablesProcessed"":0,""tablesSkipped"":2}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: once after reload does no incremental work" {
|
||||
# running once populates stats and returns valid json response
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
dolt sql -r csv -q "call dolt_stats_once();"
|
||||
run dolt sql -r csv -q "call dolt_stats_once();"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${lines[1]}" =~ '{""dbCnt"":1,""bucketWrites"":0,""tablesProcessed"":2,""tablesSkipped"":0}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_wait" {
|
||||
# wait stalls until stats are ready
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
run dolt sql -r csv <<EOF
|
||||
call dolt_stats_restart();
|
||||
call dolt_stats_wait();
|
||||
select count(*) from dolt_statistics
|
||||
EOF
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[5]}" = "2" ]
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_info" {
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_server_wait" {
|
||||
# wait stalls until stats are ready
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
start_sql_server
|
||||
stop_sql_server
|
||||
|
||||
dolt sql -r csv -q "call dolt_stats_wait()"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
}
|
||||
|
||||
@test "stats: auto-update on server startup" {
|
||||
@test "stats: dolt_stats_server_paused" {
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_paused = 1;"
|
||||
|
||||
start_sql_server
|
||||
|
||||
dolt sql -q "call dolt_stats_info('--short')"
|
||||
|
||||
run dolt sql -r "call dolt_stats_wait()"
|
||||
[ "$status" -eq 1 ]
|
||||
run dolt sql -r "call dolt_stats_gc()"
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_purge" {
|
||||
# running once populates stats and returns valid json response
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_purge(); call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${lines[5]}" =~ '"{""dbCnt"":0,""active"":false,""storageBucketCnt"":0,""cachedBucketCnt"":0,""cachedBoundCnt"":0,""cachedTemplateCnt"":0,""statCnt"":0,""backing"":""repo2""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_purge server" {
|
||||
cd repo2
|
||||
|
||||
start_sql_server
|
||||
run dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
sleep 1
|
||||
stop_sql_server
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
dolt sql -q "call dolt_stats_wait()"
|
||||
dolt sql -q "call dolt_stats_stop()"
|
||||
dolt sql -q "call dolt_stats_purge()"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${lines[1]}" =~ '"{""dbCnt"":0,""active"":false,""storageBucketCnt"":0,""cachedBucketCnt"":0,""cachedBoundCnt"":0,""cachedTemplateCnt"":0,""statCnt"":0,""backing"":""repo2""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_gc fails in shell" {
|
||||
cd repo2
|
||||
dolt sql <<SQL
|
||||
insert into xy values (0,0), (1,1);
|
||||
call dolt_stats_once();
|
||||
insert into xy values (2,2), (3,3);
|
||||
call dolt_stats_once();
|
||||
SQL
|
||||
|
||||
run dolt sql -q "dolt_stats_gc()"
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":0,""active"":false,""storageBucketCnt"":4,""cachedBucketCnt"":0,""cachedBoundCnt"":0,""cachedTemplateCnt"":0,""statCnt"":0,""backing"":""repo2""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_gc server" {
|
||||
cd repo2
|
||||
|
||||
# only user-triggered GC's
|
||||
dolt sql -q "SET @@PERSIST.dolt_stats_gc_enabled = 0"
|
||||
|
||||
start_sql_server
|
||||
|
||||
dolt sql -r csv <<SQL
|
||||
insert into xy values (0,0), (1,1);
|
||||
create table toDelete(i int primary key);
|
||||
insert into toDelete values (5), (6);
|
||||
|
||||
-- invalidate previous xy buckets
|
||||
call dolt_stats_wait();
|
||||
call dolt_stats_info('--short');
|
||||
insert into xy values (2,2), (3,3);
|
||||
|
||||
call dolt_add('-A');
|
||||
call dolt_commit('-m', 'main branch');
|
||||
|
||||
-- mirror main
|
||||
call dolt_checkout('-b', 'feat1');
|
||||
call dolt_checkout('-b', 'feat2');
|
||||
|
||||
create database other;
|
||||
use other;
|
||||
create table ot (i int primary key);
|
||||
insert into ot values (0), (1), (2);
|
||||
|
||||
call dolt_stats_wait();
|
||||
call dolt_stats_info('--short');
|
||||
SQL
|
||||
|
||||
# starting point
|
||||
# dbs: repo2/[main, feat1, feat2], other/main
|
||||
# stats: repo2:[xy,ab,toDelete]*3, other:[ot]*1
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short');"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":6,""cachedBucketCnt"":6,""cachedBoundCnt"":6,""cachedTemplateCnt"":6,""statCnt"":10,""backing"":""repo2""}"' ]] || false
|
||||
|
||||
# clear invalid xy
|
||||
dolt sql -q "call dolt_stats_gc()"
|
||||
dolt sql -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":6,""statCnt"":10,""backing"":""repo2""}"' ]] || false
|
||||
|
||||
# remove toDelete table from 2/3 branches and gc
|
||||
dolt sql -q "use repo2; call dolt_checkout('feat1'); drop table toDelete"
|
||||
dolt sql -q "use repo2; call dolt_checkout('main'); drop table toDelete"
|
||||
dolt sql -q "call dolt_stats_gc()"
|
||||
dolt sql -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":4,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":6,""statCnt"":8,""backing"":""repo2""}"' ]] || false
|
||||
|
||||
# remove branch stats and gc
|
||||
dolt sql -q "use repo2; call dolt_branch('-D', 'feat1', 'feat2')"
|
||||
dolt sql -q "call dolt_stats_wait()"
|
||||
dolt sql -q "call dolt_stats_gc()"
|
||||
dolt sql -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":2,""active"":true,""storageBucketCnt"":3,""cachedBucketCnt"":3,""cachedBoundCnt"":3,""cachedTemplateCnt"":5,""statCnt"":3,""backing"":""repo2""}"' ]] || false
|
||||
|
||||
# delete whole db and gc
|
||||
dolt sql -q "drop database other;"
|
||||
dolt sql -q "call dolt_stats_wait()"
|
||||
dolt sql -q "call dolt_stats_gc()"
|
||||
dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: delete database clean swap" {
|
||||
# only user-triggered GC's
|
||||
dolt sql -q "SET @@PERSIST.dolt_stats_gc_enabled = 0"
|
||||
|
||||
# don't start server in repo2, the shell->server access
|
||||
# breaks when you delete the primary database
|
||||
start_sql_server
|
||||
|
||||
dolt sql -r csv <<SQL
|
||||
use repo2;
|
||||
insert into xy values (0,0), (1,1);
|
||||
|
||||
create database other;
|
||||
use other;
|
||||
create table ot (i int primary key);
|
||||
insert into ot values (0), (1), (2);
|
||||
|
||||
call dolt_stats_wait();
|
||||
|
||||
use other;
|
||||
drop database repo2;
|
||||
drop database repo1;
|
||||
call dolt_stats_gc();
|
||||
SQL
|
||||
|
||||
# other still exists
|
||||
dolt sql -q "call dolt_stats_info('--short');"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short');"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":1,""cachedBucketCnt"":1,""cachedBoundCnt"":1,""cachedTemplateCnt"":1,""statCnt"":1,""backing"":""other""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: multiple stats dbs at start is OK" {
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0)"
|
||||
dolt sql -q "insert into ab values (0,0)"
|
||||
dolt sql -q "call dolt_stats_once()"
|
||||
|
||||
cd ../repo1
|
||||
dolt sql -q "insert into ab values (0,0)"
|
||||
dolt sql -q "call dolt_stats_once()"
|
||||
|
||||
cd ..
|
||||
start_sql_server
|
||||
|
||||
dolt sql -q "call dolt_stats_wait();"
|
||||
dolt sql -q "call dolt_stats_info('--short');"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short');"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":2,""active"":true,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":3,""backing"":""repo1""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: dolt_stats_stop_restart" {
|
||||
cd repo2
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
start_sql_server
|
||||
|
||||
dolt sql -r csv -q "call dolt_stats_wait()"
|
||||
|
||||
# server running stats by default
|
||||
dolt sql -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
|
||||
|
||||
# stop turns stats off
|
||||
dolt sql -q "call dolt_stats_stop()"
|
||||
dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
|
||||
|
||||
|
||||
# don't pick up changes when stopped
|
||||
dolt sql -q "insert into xy values (2,2), (4,4)"
|
||||
|
||||
run dolt sql -r csv -q "call dolt_stats_wait()"
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":2,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
|
||||
|
||||
dolt sql -r csv -q "call dolt_stats_restart()"
|
||||
dolt sql -r csv -q "call dolt_stats_wait()"
|
||||
dolt sql -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":4,""cachedBucketCnt"":4,""cachedBoundCnt"":4,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""repo2""}"' ]] || false
|
||||
}
|
||||
|
||||
@test "stats: memory only doesn't write to disk" {
|
||||
cd repo2
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_memory_only = 1"
|
||||
|
||||
start_sql_server
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
dolt sql -q "call dolt_stats_once()"
|
||||
|
||||
dolt sql -q "call dolt_stats_info('--short')"
|
||||
run dolt sql -r csv -q "call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":true,""storageBucketCnt"":0,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""memory""}"' ]] || false
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
stop_sql_server
|
||||
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); call dolt_stats_info('--short')"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '"{""dbCnt"":1,""active"":false,""storageBucketCnt"":0,""cachedBucketCnt"":2,""cachedBoundCnt"":2,""cachedTemplateCnt"":4,""statCnt"":2,""backing"":""memory""}"' ]] || false
|
||||
}
|
||||
|
||||
|
||||
@test "stats: only bootstrap server startup" {
|
||||
@test "stats: waiters error for closed stats queue" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
dolt gc
|
||||
run dolt sql -q "call dolt_stats_gc()"
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
run dolt sql -q "call dolt_stats_wait()"
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
run dolt sql -q "call dolt_stats_flush()"
|
||||
[ "$status" -eq 1 ]
|
||||
}
|
||||
|
||||
@test "stats: encode/decode loop is delimiter safe" {
|
||||
@@ -147,12 +387,11 @@ teardown() {
|
||||
dolt sql <<EOF
|
||||
create table uv (u varbinary(255) primary key);
|
||||
insert into uv values ('hello, world');
|
||||
analyze table uv;
|
||||
EOF
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "1" ]
|
||||
[ "${lines[3]}" = "1" ]
|
||||
}
|
||||
|
||||
@test "stats: correct stats directory location, issue#8324" {
|
||||
@@ -167,6 +406,9 @@ EOF
|
||||
|
||||
run stat .dolt/repo2
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
run stat .dolt/stats/.dolt
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "stats: restart in shell doesn't drop db, issue#8345" {
|
||||
@@ -174,492 +416,62 @@ EOF
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1), (2,2), (3,3), (4,4)"
|
||||
dolt sql -q "insert into ab values (0,0), (1,1), (2,2), (3,3), (4,4)"
|
||||
dolt sql -q "ANALYZE table xy, ab"
|
||||
run dolt sql -r csv <<EOF
|
||||
call dolt_stats_once();
|
||||
select count(*) from dolt_statistics;
|
||||
set @@GLOBAL.dolt_stats_auto_refresh_interval = 2;
|
||||
call dolt_stats_restart();
|
||||
select count(*) from dolt_statistics;
|
||||
select sleep(3);
|
||||
call dolt_stats_wait();
|
||||
select count(*) from dolt_statistics;
|
||||
EOF
|
||||
[ "${lines[1]}" = "4" ]
|
||||
[ "${lines[5]}" = "4" ]
|
||||
[ "${lines[9]}" = "4" ]
|
||||
[ "${lines[3]}" = "4" ]
|
||||
[ "${lines[7]}" = "4" ]
|
||||
[ "${lines[11]}" = "4" ]
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "stats: stats roundtrip restart" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
# make sure no stats
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
|
||||
# add stats while server is running
|
||||
start_sql_server
|
||||
dolt sql -q "call dolt_stats_restart()"
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
stop_sql_server
|
||||
|
||||
# make sure restarted server sees same stats
|
||||
start_sql_server
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
stop_sql_server
|
||||
}
|
||||
|
||||
@test "stats: deletes refresh" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;"
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "set @@persist.dolt_stats_auto_refresh_enabled = 1;"
|
||||
dolt sql -q "set @@persist.dolt_stats_auto_refresh_threshold = .5"
|
||||
dolt sql -q "set @@persist.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
start_sql_server
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "8" ]
|
||||
|
||||
# delete >50% of rows
|
||||
dolt sql -q "delete from xy where x > 600"
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "4" ]
|
||||
}
|
||||
|
||||
@test "stats: dolt_state_purge cli" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
#start_sql_server
|
||||
|
||||
#sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
dolt sql -q "call dolt_stats_purge()"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
}
|
||||
|
||||
@test "stats: dolt_state_purge server" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
|
||||
|
||||
start_sql_server
|
||||
|
||||
sleep 1
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
dolt sql -q "call dolt_stats_purge()"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
stop_sql_server
|
||||
}
|
||||
|
||||
@test "stats: dolt_state_prune cli" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
#start_sql_server
|
||||
|
||||
#sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
dolt sql -q "call dolt_stats_prune()"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
}
|
||||
|
||||
@test "stats: dolt_state_prune server" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 0;"
|
||||
|
||||
start_sql_server
|
||||
|
||||
sleep 1
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
dolt sql -q "call dolt_stats_prune()"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
stop_sql_server
|
||||
}
|
||||
|
||||
@test "stats: add/delete table" {
|
||||
cd repo1
|
||||
|
||||
dolt sql -q "insert into ab values (0,0), (1,0), (2,0)"
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
start_sql_server
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
# add table
|
||||
dolt sql -q "create table xy (x int primary key, y int)"
|
||||
# schema changes don't impact the table hash
|
||||
dolt sql -q "insert into xy values (0,0)"
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "1" ]
|
||||
|
||||
dolt sql -q "truncate table xy"
|
||||
|
||||
sleep 1
|
||||
|
||||
dolt sql -q "select * from xy"
|
||||
|
||||
dolt sql -q "select * from dolt_statistics where table_name = 'xy'"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
|
||||
dolt sql -q "drop table xy"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics where table_name = 'xy'"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
}
|
||||
|
||||
@test "stats: add/delete index" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,0), (2,0)"
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
start_sql_server
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
|
||||
# delete secondary
|
||||
dolt sql -q "alter table xy drop index y"
|
||||
# schema changes don't impact the table hash
|
||||
dolt sql -q "insert into xy values (3,0)"
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "1" ]
|
||||
|
||||
dolt sql -q "alter table xy add index yx (y,x)"
|
||||
# row change to impact table hash
|
||||
dolt sql -q "insert into xy values (4,0)"
|
||||
|
||||
sleep 1
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
}
|
||||
|
||||
@test "stats: most common values" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "alter table xy add index y2 (y)"
|
||||
dolt sql -q "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)"
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
run dolt sql -r csv -q "select mcv1, mcv2 from dolt_statistics where index_name = 'y2'"
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); select mcv1, mcv2 from dolt_statistics where index_name = 'y2'"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "1,0" ]
|
||||
}
|
||||
|
||||
@test "stats: multi db" {
|
||||
cd repo1
|
||||
|
||||
dolt sql -q "insert into ab values (0,0), (1,1)"
|
||||
|
||||
cd ../repo2
|
||||
|
||||
dolt sql -q "insert into ab values (0,0), (1,1)"
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
cd ..
|
||||
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = 0.5"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
start_sql_server
|
||||
sleep 1
|
||||
|
||||
dolt sql -q "use repo1"
|
||||
run dolt sql -r csv -q "select database_name, table_name, index_name from dolt_statistics order by index_name"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[0]}" = "database_name,table_name,index_name" ]
|
||||
[ "${lines[1]}" = "repo1,ab,b" ]
|
||||
[ "${lines[2]}" = "repo1,ab,primary" ]
|
||||
|
||||
run dolt sql -r csv -q "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[0]}" = "database_name,table_name,index_name" ]
|
||||
[ "${lines[1]}" = "repo2,ab,b" ]
|
||||
[ "${lines[2]}" = "repo2,ab,primary" ]
|
||||
[ "${lines[3]}" = "repo2,xy,primary" ]
|
||||
[ "${lines[4]}" = "repo2,xy,y" ]
|
||||
}
|
||||
|
||||
@test "stats: add/delete database" {
|
||||
cd repo1
|
||||
|
||||
# setting variables doesn't hang or error
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = .5"
|
||||
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
start_sql_server
|
||||
|
||||
dolt sql -q "insert into ab values (0,0), (1,0), (2,0)"
|
||||
dolt sql <<SQL
|
||||
create database repo2;
|
||||
create table repo2.xy (x int primary key, y int, key(y,x));
|
||||
insert into repo2.xy values (0,0), (1,0), (2,0);
|
||||
SQL
|
||||
|
||||
sleep 1
|
||||
|
||||
# specify database_name filter even though can only see active db stats
|
||||
run dolt sql -r csv <<SQL
|
||||
use repo2;
|
||||
select count(*) from dolt_statistics where database_name = 'repo2';
|
||||
SQL
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[2]}" = "2" ]
|
||||
|
||||
# drop repo2
|
||||
dolt sql -q "drop database repo2"
|
||||
|
||||
sleep 1
|
||||
|
||||
# we can't access repo2 stats, but still try
|
||||
run dolt sql -r csv <<SQL
|
||||
select count(*) from dolt_statistics where database_name = 'repo2';
|
||||
SQL
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
|
||||
dolt sql <<SQL
|
||||
create database repo2;
|
||||
create table repo2.xy (x int primary key, y int, key(y,x));
|
||||
SQL
|
||||
|
||||
sleep 1
|
||||
|
||||
# no rows yet
|
||||
run dolt sql -r csv <<SQL
|
||||
use repo2;
|
||||
select count(*) from dolt_statistics where database_name = 'repo2';
|
||||
SQL
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[2]}" = "0" ]
|
||||
|
||||
dolt sql <<SQL
|
||||
use repo2;
|
||||
insert into xy values (0,0);
|
||||
analyze table xy;
|
||||
SQL
|
||||
|
||||
sleep 1
|
||||
|
||||
# insert initializes stats
|
||||
run dolt sql -r csv <<SQL
|
||||
use repo2;
|
||||
select count(*) from dolt_statistics where database_name = 'repo2';
|
||||
SQL
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[2]}" = "2" ]
|
||||
}
|
||||
|
||||
# bats test_tags=no_lambda
|
||||
@test "stats: boostrap abort over 1mm rows" {
|
||||
cat <<EOF > data.py
|
||||
import random
|
||||
import os
|
||||
|
||||
rows = 2*1000*1000+1
|
||||
|
||||
def main():
|
||||
f = open("data.csv","w+")
|
||||
f.write("id,hostname\n")
|
||||
|
||||
for i in range(rows):
|
||||
hostname = random.getrandbits(100)
|
||||
f.write(f"{i},{hostname}\n")
|
||||
if i % (500*1000) == 0:
|
||||
print("row :", i)
|
||||
f.flush()
|
||||
|
||||
f.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
EOF
|
||||
|
||||
mkdir repo3
|
||||
cd repo3
|
||||
python3 ../data.py
|
||||
|
||||
dolt init
|
||||
dolt sql -q "create table f (id int primary key, hostname int)"
|
||||
dolt table import -u --continue f data.csv
|
||||
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false
|
||||
[ "${lines[2]}" = "0" ]
|
||||
[ "${lines[3]}" = "1,0" ]
|
||||
}
|
||||
|
||||
@test "stats: stats delete index schema change" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
# stats OK after analyze
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
[ "${lines[3]}" = "2" ]
|
||||
|
||||
dolt sql -q "alter table xy drop index y"
|
||||
|
||||
# load after schema change should purge
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "1" ]
|
||||
[ "${lines[3]}" = "1" ]
|
||||
}
|
||||
|
||||
@test "stats: stats recreate table without index" {
|
||||
cd repo2
|
||||
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
|
||||
dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
|
||||
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "2" ]
|
||||
dolt sql -q "call dolt_stats_once()"
|
||||
|
||||
dolt sql -q "drop table xy"
|
||||
dolt sql -q "create table xy (x int primary key, y int)"
|
||||
dolt sql -q "insert into xy values (0,0), (1,1)"
|
||||
|
||||
# make sure no stats
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
run dolt sql -r csv -q "call dolt_stats_once(); select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "0" ]
|
||||
|
||||
dolt sql -q "analyze table xy"
|
||||
|
||||
run dolt sql -r csv -q "select count(*) from dolt_statistics"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = "1" ]
|
||||
|
||||
stop_sql_server
|
||||
[ "${lines[3]}" = "1" ]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user