mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-22 11:29:06 -05:00
Merge pull request #5899 from dolthub/aaron/dolt_gc-unavailable-on-standby-replica
go: sqle: dolt_gc.go: Disable CALL dolt_gc() on a standby replica, where it is not currently safe. Shallow GC is still available.
This commit is contained in:
@@ -95,8 +95,6 @@ type procedurestore interface {
|
||||
}
|
||||
|
||||
const (
|
||||
DoltClusterRoleVariable = "dolt_cluster_role"
|
||||
DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch"
|
||||
// Since we fetch the keys from the other replicas we’re going to use a fixed string here.
|
||||
DoltClusterRemoteApiAudience = "dolt-cluster-remote-api.dolthub.com"
|
||||
)
|
||||
@@ -285,17 +283,17 @@ func (c *Controller) refreshSystemVars() {
|
||||
role, epoch := string(c.role), c.epoch
|
||||
vars := []sql.SystemVariable{
|
||||
{
|
||||
Name: DoltClusterRoleVariable,
|
||||
Name: dsess.DoltClusterRoleVariable,
|
||||
Dynamic: false,
|
||||
Scope: sql.SystemVariableScope_Persist,
|
||||
Type: gmstypes.NewSystemStringType(DoltClusterRoleVariable),
|
||||
Type: gmstypes.NewSystemStringType(dsess.DoltClusterRoleVariable),
|
||||
Default: role,
|
||||
},
|
||||
{
|
||||
Name: DoltClusterRoleEpochVariable,
|
||||
Name: dsess.DoltClusterRoleEpochVariable,
|
||||
Dynamic: false,
|
||||
Scope: sql.SystemVariableScope_Persist,
|
||||
Type: gmstypes.NewSystemIntType(DoltClusterRoleEpochVariable, 0, 9223372036854775807, false),
|
||||
Type: gmstypes.NewSystemIntType(dsess.DoltClusterRoleEpochVariable, 0, 9223372036854775807, false),
|
||||
Default: epoch,
|
||||
},
|
||||
}
|
||||
@@ -304,16 +302,16 @@ func (c *Controller) refreshSystemVars() {
|
||||
|
||||
func (c *Controller) persistVariables() error {
|
||||
toset := make(map[string]string)
|
||||
toset[DoltClusterRoleVariable] = string(c.role)
|
||||
toset[DoltClusterRoleEpochVariable] = strconv.Itoa(c.epoch)
|
||||
toset[dsess.DoltClusterRoleVariable] = string(c.role)
|
||||
toset[dsess.DoltClusterRoleEpochVariable] = strconv.Itoa(c.epoch)
|
||||
return c.persistentCfg.SetStrings(toset)
|
||||
}
|
||||
|
||||
func applyBootstrapClusterConfig(lgr *logrus.Logger, cfg Config, pCfg config.ReadWriteConfig) (Role, int, error) {
|
||||
toset := make(map[string]string)
|
||||
persistentRole := pCfg.GetStringOrDefault(DoltClusterRoleVariable, "")
|
||||
persistentRole := pCfg.GetStringOrDefault(dsess.DoltClusterRoleVariable, "")
|
||||
var roleFromPersistentConfig bool
|
||||
persistentEpoch := pCfg.GetStringOrDefault(DoltClusterRoleEpochVariable, "")
|
||||
persistentEpoch := pCfg.GetStringOrDefault(dsess.DoltClusterRoleEpochVariable, "")
|
||||
if persistentRole == "" {
|
||||
if cfg.BootstrapRole() != "" {
|
||||
lgr.Tracef("cluster/controller: persisted cluster role was empty, apply bootstrap_role %s", cfg.BootstrapRole())
|
||||
@@ -322,7 +320,7 @@ func applyBootstrapClusterConfig(lgr *logrus.Logger, cfg Config, pCfg config.Rea
|
||||
lgr.Trace("cluster/controller: persisted cluster role was empty, bootstrap_role was empty: defaulted to primary")
|
||||
persistentRole = "primary"
|
||||
}
|
||||
toset[DoltClusterRoleVariable] = persistentRole
|
||||
toset[dsess.DoltClusterRoleVariable] = persistentRole
|
||||
} else {
|
||||
roleFromPersistentConfig = true
|
||||
lgr.Tracef("cluster/controller: persisted cluster role is %s", persistentRole)
|
||||
@@ -330,19 +328,19 @@ func applyBootstrapClusterConfig(lgr *logrus.Logger, cfg Config, pCfg config.Rea
|
||||
if persistentEpoch == "" {
|
||||
persistentEpoch = strconv.Itoa(cfg.BootstrapEpoch())
|
||||
lgr.Tracef("cluster/controller: persisted cluster role epoch is empty, took boostrap_epoch: %s", persistentEpoch)
|
||||
toset[DoltClusterRoleEpochVariable] = persistentEpoch
|
||||
toset[dsess.DoltClusterRoleEpochVariable] = persistentEpoch
|
||||
} else {
|
||||
lgr.Tracef("cluster/controller: persisted cluster role epoch is %s", persistentEpoch)
|
||||
}
|
||||
if persistentRole != string(RolePrimary) && persistentRole != string(RoleStandby) {
|
||||
isallowed := persistentRole == string(RoleDetectedBrokenConfig) && roleFromPersistentConfig
|
||||
if !isallowed {
|
||||
return "", 0, fmt.Errorf("persisted role %s.%s = %s must be \"primary\" or \"secondary\"", PersistentConfigPrefix, DoltClusterRoleVariable, persistentRole)
|
||||
return "", 0, fmt.Errorf("persisted role %s.%s = %s must be \"primary\" or \"secondary\"", PersistentConfigPrefix, dsess.DoltClusterRoleVariable, persistentRole)
|
||||
}
|
||||
}
|
||||
epochi, err := strconv.Atoi(persistentEpoch)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("persisted role epoch %s.%s = %s must be an integer", PersistentConfigPrefix, DoltClusterRoleEpochVariable, persistentEpoch)
|
||||
return "", 0, fmt.Errorf("persisted role epoch %s.%s = %s must be an integer", PersistentConfigPrefix, dsess.DoltClusterRoleEpochVariable, persistentEpoch)
|
||||
}
|
||||
if len(toset) > 0 {
|
||||
err := pCfg.SetStrings(toset)
|
||||
|
||||
@@ -86,10 +86,47 @@ func doDoltGC(ctx *sql.Context, args []string) (int, error) {
|
||||
return cmdFailure, err
|
||||
}
|
||||
} else {
|
||||
// Currently, if this server is involved in cluster
|
||||
// replication, a full GC is only safe to run on the primary.
|
||||
// We assert that we are the primary here before we begin, and
|
||||
// we assert again that we are the primary at the same epoch as
|
||||
// we establish the safepoint.
|
||||
|
||||
origepoch := -1
|
||||
if _, role, ok := sql.SystemVariables.GetGlobal(dsess.DoltClusterRoleVariable); ok {
|
||||
// TODO: magic constant...
|
||||
if role.(string) != "primary" {
|
||||
return cmdFailure, fmt.Errorf("cannot run a full dolt_gc() while cluster replication is enabled and role is %s; must be the primary", role.(string))
|
||||
}
|
||||
_, epoch, ok := sql.SystemVariables.GetGlobal(dsess.DoltClusterRoleEpochVariable)
|
||||
if !ok {
|
||||
return cmdFailure, fmt.Errorf("internal error: cannot run a full dolt_gc(); cluster replication is enabled but could not read %s", dsess.DoltClusterRoleEpochVariable)
|
||||
}
|
||||
origepoch = epoch.(int)
|
||||
}
|
||||
|
||||
// TODO: If we got a callback at the beginning and an
|
||||
// (allowed-to-block) callback at the end, we could more
|
||||
// gracefully tear things down.
|
||||
err = ddb.GC(ctx, func() error {
|
||||
if origepoch != -1 {
|
||||
// Here we need to sanity check role and epoch.
|
||||
if _, role, ok := sql.SystemVariables.GetGlobal(dsess.DoltClusterRoleVariable); ok {
|
||||
if role.(string) != "primary" {
|
||||
return fmt.Errorf("dolt_gc failed: when we began we were a primary in a cluster, but now our role is %s", role.(string))
|
||||
}
|
||||
_, epoch, ok := sql.SystemVariables.GetGlobal(dsess.DoltClusterRoleEpochVariable)
|
||||
if !ok {
|
||||
return fmt.Errorf("dolt_gc failed: when we began we were a primary in a cluster, but we can no longer read the cluster role epoch.")
|
||||
}
|
||||
if origepoch != epoch.(int) {
|
||||
return fmt.Errorf("dolt_gc failed: when we began we were primary in the cluster at epoch %d, but now we are at epoch %d. for gc to safely finalize, our role and epoch must not change throughout the gc.", origepoch, epoch.(int))
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("dolt_gc failed: when we began we were a primary in a cluster, but we can no longer read the cluster role.")
|
||||
}
|
||||
}
|
||||
|
||||
killed := make(map[uint32]struct{})
|
||||
processes := ctx.ProcessList.Processes()
|
||||
for _, p := range processes {
|
||||
|
||||
@@ -50,6 +50,9 @@ const (
|
||||
AwsCredsProfile = "aws_credentials_profile"
|
||||
AwsCredsRegion = "aws_credentials_region"
|
||||
ShowBranchDatabases = "dolt_show_branch_databases"
|
||||
|
||||
DoltClusterRoleVariable = "dolt_cluster_role"
|
||||
DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch"
|
||||
)
|
||||
|
||||
const URLTemplateDatabasePlaceholder = "{database}"
|
||||
|
||||
@@ -1022,4 +1022,79 @@ tests:
|
||||
- on: server1
|
||||
queries:
|
||||
- exec: 'use repo1'
|
||||
- exec: 'call dolt_checkout("new_branch_name")'
|
||||
- exec: 'call dolt_checkout("new_branch_name")'
|
||||
- name: call dolt gc
|
||||
multi_repos:
|
||||
- name: server1
|
||||
with_files:
|
||||
- name: server.yaml
|
||||
contents: |
|
||||
log_level: trace
|
||||
listener:
|
||||
host: 0.0.0.0
|
||||
port: 3309
|
||||
cluster:
|
||||
standby_remotes:
|
||||
- name: standby
|
||||
remote_url_template: http://localhost:3852/{database}
|
||||
bootstrap_role: primary
|
||||
bootstrap_epoch: 1
|
||||
remotesapi:
|
||||
port: 3851
|
||||
server:
|
||||
args: ["--config", "server.yaml"]
|
||||
port: 3309
|
||||
- name: server2
|
||||
with_files:
|
||||
- name: server.yaml
|
||||
contents: |
|
||||
log_level: trace
|
||||
listener:
|
||||
host: 0.0.0.0
|
||||
port: 3310
|
||||
cluster:
|
||||
standby_remotes:
|
||||
- name: standby
|
||||
remote_url_template: http://localhost:3851/{database}
|
||||
bootstrap_role: standby
|
||||
bootstrap_epoch: 1
|
||||
remotesapi:
|
||||
port: 3852
|
||||
server:
|
||||
args: ["--config", "server.yaml"]
|
||||
port: 3310
|
||||
connections:
|
||||
- on: server1
|
||||
queries:
|
||||
- exec: 'create database repo1'
|
||||
- exec: 'use repo1'
|
||||
- exec: 'create table vals (id int primary key, val int)'
|
||||
- exec: 'insert into vals values (1,1)'
|
||||
- exec: 'insert into vals values (2,2)'
|
||||
- exec: 'insert into vals values (3,3)'
|
||||
- exec: 'insert into vals values (4,4)'
|
||||
- exec: 'call dolt_gc()'
|
||||
- exec: 'select * from vals'
|
||||
error_match: "this connection can no longer be used"
|
||||
- on: server1
|
||||
queries:
|
||||
- query: "select `database`, standby_remote, role, epoch, replication_lag_millis, current_error from dolt_cluster.dolt_cluster_status order by `database` asc"
|
||||
result:
|
||||
columns: ["database","standby_remote","role","epoch","replication_lag_millis","current_error"]
|
||||
rows:
|
||||
- ["repo1","standby","primary","1","0","NULL"]
|
||||
retry_attempts: 100
|
||||
- on: server2
|
||||
queries:
|
||||
- exec: 'use repo1'
|
||||
- query: "select * from vals order by id asc"
|
||||
result:
|
||||
columns: ["id","val"]
|
||||
rows:
|
||||
- [1,1]
|
||||
- [2,2]
|
||||
- [3,3]
|
||||
- [4,4]
|
||||
- exec: 'call dolt_gc()'
|
||||
error_match: "must be the primary"
|
||||
- exec: 'call dolt_gc("--shallow")'
|
||||
|
||||
Reference in New Issue
Block a user