Implement dolt admin createchunk commit

This commit is contained in:
Nick Tobey
2025-04-21 22:29:56 -07:00
parent d4b9a44f22
commit b18122e627
8 changed files with 530 additions and 2 deletions
+13
View File
@@ -313,6 +313,19 @@ func CreateReflogArgParser() *argparser.ArgParser {
return ap
}
func CreateCreateCommitParser() *argparser.ArgParser {
ap := argparser.NewArgParserWithMaxArgs("createchunk commit", 0)
ap.SupportsString(AuthorParam, "", "author", "Specify an explicit author using the standard A U Thor {{.LessThan}}author@example.com{{.GreaterThan}} format.")
ap.SupportsString("desc", "", "commit description", "the description in the commit")
ap.SupportsFlag(ForceFlag, "", "when used alongside --branch, allows overwriting an existing branch")
ap.SupportsRequiredString("root", "", "database root", "the root hash of the database at this commit")
ap.SupportsStringList("parents", "", "parent commits", "a list of the commit hashes of the parent commit")
ap.SupportsString(BranchParam, "", "ref to assign to", "if set, the new commit will be reachable at this ref")
return ap
}
func CreateGlobalArgParser(name string) *argparser.ArgParser {
ap := argparser.NewArgParserWithVariableArgs(name)
if name == "dolt" {
+2
View File
@@ -16,6 +16,7 @@ package admin
import (
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/cmd/dolt/commands/admin/createchunk"
)
var Commands = cli.NewHiddenSubCommandHandler("admin", "Commands for directly working with Dolt storage for purposes of testing or database recovery", []cli.Command{
@@ -23,4 +24,5 @@ var Commands = cli.NewHiddenSubCommandHandler("admin", "Commands for directly wo
ShowRootCmd{},
ZstdCmd{},
StorageCmd{},
createchunk.Commands,
})
@@ -0,0 +1,23 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package createchunk
import (
"github.com/dolthub/dolt/go/cmd/dolt/cli"
)
var Commands = cli.NewHiddenSubCommandHandler("createchunk", "Low-level commands for creating chunks", []cli.Command{
CreateCommitCmd{},
})
@@ -0,0 +1,170 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package createchunk
import (
"bytes"
"context"
"github.com/dolthub/dolt/go/cmd/dolt/errhand"
"github.com/dolthub/go-mysql-server/sql"
"github.com/gocraft/dbr/v2"
"github.com/gocraft/dbr/v2/dialect"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/utils/argparser"
)
// CreateCommitCmd creates a new commit chunk, printing the new chunk's hash on success.
// The user must supply a branch name, which will be set to this new commit.
// This is only required for the CLI command, and is optional when invoking the equivalent stored procedure.
// This is because the journal must end with a root hash, and is only flushed when there is a new root hash.
// Thus, we must update the root hash before the command finishes, or else changes will not be persisted.
type CreateCommitCmd struct{}
func generateCreateCommitSQL(cliCtx cli.CliContext, apr *argparser.ArgParseResults) (query string, params []interface{}, err error) {
var buffer bytes.Buffer
var first bool
first = true
buffer.WriteString("CALL DOLT_ADMIN_CREATECHUNK_COMMIT(")
writeParam := func(key, val string) {
if !first {
buffer.WriteString(", ")
}
buffer.WriteString("'--")
buffer.WriteString(key)
buffer.WriteString("', ")
buffer.WriteString("?")
first = false
params = append(params, val)
}
forwardParam := func(key string) {
val, ok := apr.GetValue(key)
if !ok {
return
}
writeParam(key, val)
}
forwardFlag := func(flag string) {
if !apr.Contains(flag) {
return
}
if !first {
buffer.WriteString(", ")
}
buffer.WriteString("'--")
buffer.WriteString(flag)
buffer.WriteString("'")
first = false
}
var author string
if apr.Contains(cli.AuthorParam) {
author, _ = apr.GetValue(cli.AuthorParam)
} else {
name, email, err := env.GetNameAndEmail(cliCtx.Config())
if err != nil {
return "", nil, err
}
author = name + " <" + email + ">"
}
writeParam(cli.AuthorParam, author)
forwardParam("desc")
forwardParam("root")
forwardParam("parents")
forwardParam(cli.BranchParam)
forwardFlag(cli.ForceFlag)
buffer.WriteString(")")
return buffer.String(), params, nil
}
// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
func (cmd CreateCommitCmd) Name() string {
return "commit"
}
// Description returns a description of the command
func (cmd CreateCommitCmd) Description() string {
return "Creates a new commit chunk in the dolt storage"
}
// RequiresRepo should return false if this interface is implemented, and the command does not have the requirement
// that it be run from within a data repository directory
func (cmd CreateCommitCmd) RequiresRepo() bool {
return false
}
func (cmd CreateCommitCmd) Docs() *cli.CommandDocumentation {
// Admin commands are undocumented
return nil
}
func (cmd CreateCommitCmd) ArgParser() *argparser.ArgParser {
return cli.CreateCreateCommitParser()
}
func (cmd CreateCommitCmd) Exec(ctx context.Context, commandStr string, args []string, _ *env.DoltEnv, cliCtx cli.CliContext) int {
ap := cmd.ArgParser()
usage, _ := cli.HelpAndUsagePrinters(cli.CommandDocsForCommandString(commandStr, cli.CommandDocumentationContent{}, ap))
// Ensure that the CLI args parse, but only check that a branch was supplied.
// All other args will be validated in the system procedure, but the branch is only required in the CLI.
apr := cli.ParseArgsOrDie(ap, args, usage)
if !apr.Contains(cli.BranchParam) {
cli.PrintErrf("the --%s flag is required when creating a chunk using the CLI", cli.BranchParam)
return 1
}
queryist, sqlCtx, closeFunc, err := cliCtx.QueryEngine(ctx)
if err != nil {
cli.PrintErrln(errhand.VerboseErrorFromError(err))
return 1
}
if closeFunc != nil {
defer closeFunc()
}
querySql, params, err := generateCreateCommitSQL(cliCtx, apr)
if err != nil {
cli.PrintErrln(errhand.VerboseErrorFromError(err))
return 1
}
interpolatedQuery, err := dbr.InterpolateForDialect(querySql, params, dialect.MySQL)
if err != nil {
cli.PrintErrln(errhand.VerboseErrorFromError(err))
return 1
}
_, rowIter, _, err := queryist.Query(sqlCtx, interpolatedQuery)
if err != nil {
cli.PrintErrln(errhand.VerboseErrorFromError(err))
return 1
}
rows, err := sql.RowIterToRows(sqlCtx, rowIter)
if err != nil {
cli.PrintErrln(errhand.VerboseErrorFromError(err))
return 1
}
cli.Println(rows[0][0])
return 0
}
-2
View File
@@ -42,7 +42,6 @@ import (
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/cmd/dolt/commands"
"github.com/dolthub/dolt/go/cmd/dolt/commands/admin"
"github.com/dolthub/dolt/go/cmd/dolt/commands/ci"
"github.com/dolthub/dolt/go/cmd/dolt/commands/credcmds"
"github.com/dolthub/dolt/go/cmd/dolt/commands/cvcmds"
@@ -71,7 +70,6 @@ var dumpDocsCommand = &commands.DumpDocsCmd{}
var dumpZshCommand = &commands.GenZshCompCmd{}
var commandsWithoutCliCtx = []cli.Command{
admin.Commands,
commands.CloneCmd{},
commands.BackupCmd{},
commands.LoginCmd{},
@@ -0,0 +1,138 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package admin
import (
"errors"
"fmt"
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
"github.com/dolthub/dolt/go/store/datas"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/go-mysql-server/sql"
)
// rowToIter returns a sql.RowIter with a single row containing the values passed in.
func rowToIter(vals ...interface{}) sql.RowIter {
row := make(sql.Row, len(vals))
for i, val := range vals {
row[i] = val
}
return sql.RowsToRowIter(row)
}
func CreateCommit(ctx *sql.Context, args ...string) (sql.RowIter, error) {
dbName := ctx.GetCurrentDatabase()
if len(dbName) == 0 {
return nil, fmt.Errorf("empty database name")
}
if err := branch_control.CheckAccess(ctx, branch_control.Permissions_Write); err != nil {
return nil, err
}
apr, err := cli.CreateCreateCommitParser().Parse(args)
if err != nil {
return nil, err
}
desc, _ := apr.GetValue("desc")
root, _ := apr.GetValue("root")
parents, _ := apr.GetValueList("parents")
branch, isBranchSet := apr.GetValue(cli.BranchParam)
force := apr.Contains(cli.ForceFlag)
var name, email string
if authorStr, ok := apr.GetValue(cli.AuthorParam); ok {
name, email, err = cli.ParseAuthor(authorStr)
if err != nil {
return nil, err
}
} else {
// In SQL mode, use the current SQL user as the commit author, instead of the `dolt config` configured values.
// We won't have an email address for the SQL user though, so instead use the MySQL user@address notation.
name = ctx.Client().User
email = fmt.Sprintf("%s@%s", ctx.Client().User, ctx.Client().Address)
}
dSess := dsess.DSessFromSess(ctx.Session)
dbData, ok := dSess.GetDbData(ctx, dbName)
db := dbData.Ddb
commitRootHash, ok := hash.MaybeParse(root)
if !ok {
return nil, fmt.Errorf("invalid root value hash")
}
var parentCommits []hash.Hash
for _, parent := range parents {
commitSpec, err := doltdb.NewCommitSpec(parent)
if err != nil {
return nil, err
}
headRef, err := dSess.CWBHeadRef(ctx, dbName)
if err != nil {
return nil, err
}
optionalCommit, err := db.Resolve(ctx, commitSpec, headRef)
if err != nil {
return nil, err
}
parentCommits = append(parentCommits, optionalCommit.Addr)
}
commitMeta, err := datas.NewCommitMeta(name, email, desc)
if err != nil {
return nil, err
}
// This isn't technically an amend, but the Amend field controls whether the commit must be a child of the ref's current commit (if any)
commitOpts := datas.CommitOptions{
Parents: parentCommits,
Meta: commitMeta,
Amend: force,
}
rootVal, err := dbData.Ddb.ValueReadWriter().ReadValue(ctx, commitRootHash)
if err != nil {
return nil, err
}
var commit *doltdb.Commit
if isBranchSet {
commit, err = dbData.Ddb.CommitValue(ctx, ref.NewBranchRef(branch), rootVal, commitOpts)
if errors.Is(err, datas.ErrMergeNeeded) {
return nil, fmt.Errorf("branch %s already exists. If you wish to overwrite it, add the --force flag", branch)
}
} else {
commit, err = dbData.Ddb.CommitDangling(ctx, rootVal, commitOpts)
}
if err != nil {
return nil, err
}
commitHash, err := commit.HashOf()
if err != nil {
return nil, err
}
return rowToIter(commitHash.String()), nil
}
@@ -15,6 +15,7 @@
package dprocedures
import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures/admin"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
)
@@ -57,6 +58,8 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{
{Name: "dolt_stats_once", Schema: statsFuncSchema, Function: statsFunc(statsOnce)},
{Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)},
{Name: "dolt_stats_timers", Schema: statsFuncSchema, Function: statsFunc(statsTimers)},
{Name: "dolt_admin_createchunk_commit", Schema: stringSchema("hash"), Function: admin.CreateCommit},
}
// stringSchema returns a non-nullable schema with all columns as LONGTEXT.
+181
View File
@@ -0,0 +1,181 @@
#! /usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
setup_common
# get the current root value
dolt branch initial
initialCommitHash=$(dolt sql -q "select dolt_hashof('initial')" -r csv | tail -n 1)
rootValueHash=$(dolt sql -q "SELECT dolt_hashof_db();" -r csv | tail -n 1)
# create a new commit with some changes
dolt sql -q "CREATE TABLE test_table(pk INT PRIMARY KEY);"
dolt commit -Am "create table"
dolt sql -q "INSERT INTO test_table VALUES (1);"
dolt commit -Am "insert into table"
# get the updated root value
newCommitHash=$(dolt sql -q "select dolt_hashof('HEAD')" -r csv | tail -n 1)
newRootValueHash=$(dolt sql -q "SELECT dolt_hashof_db();" -r csv | tail -n 1)
}
teardown() {
teardown_common
}
@test "createchunk: create commit in CLI on new branch" {
# create a new branch that flattens the commit history
flattenedCommitHash=$(dolt admin createchunk commit --root "$newRootValueHash" \
--author "a <b@c.com>" --desc "flattened history" --parents "refs/internal/create" --branch newBranch)
run dolt show newBranch
[ "$status" -eq 0 ]
[[ "$output" =~ "Author: a <b@c.com>" ]] || false
[[ "$output" =~ "flattened history" ]] || false
# Check that this commit contains both the table creation and insert
[[ "$output" =~ "added table" ]] || false
[[ "$output" =~ "| + | 1 |" ]] || false
# check that there are only two commits in the history
run dolt log newBranch
[ "$status" -eq 0 ]
[ "$(echo "$output" | grep -c commit)" -eq 2 ]
[[ "$output" =~ "Initialize data repository" ]] || false
[[ "$output" =~ "flattened history" ]] || false
}
@test "createchunk: force is a no-op when used on a new branch" {
# create a new branch that flattens the commit history
flattenedCommitHash=$(dolt admin createchunk commit --root "$newRootValueHash" \
--author "a <b@c.com>" --desc "flattened history" --parents "refs/internal/create" --branch newBranch --force)
run dolt show newBranch
[ "$status" -eq 0 ]
[[ "$output" =~ "Author: a <b@c.com>" ]] || false
[[ "$output" =~ "flattened history" ]] || false
[[ "$output" =~ "added table" ]] || false
[[ "$output" =~ "| + | 1 |" ]] || false
# check that there are only two commits in the history
run dolt log newBranch
[ "$status" -eq 0 ]
[ "$(echo "$output" | grep -c commit)" -eq 2 ]
[[ "$output" =~ "Initialize data repository" ]] || false
[[ "$output" =~ "flattened history" ]] || false
}
@test "createchunk: use default author when none is specified" {
flattenedCommitHash=$(dolt admin createchunk commit --root "$newRootValueHash" --desc "flattened history" \
--parents "refs/internal/create" --branch newBranch)
run dolt show newBranch
[ "$status" -eq 0 ]
[[ "$output" =~ "Author: Bats Tests <bats@email.fake>" ]] || false
[[ "$output" =~ "$flattenedCommitHash" ]] || false
}
@test "createchunk: commit with multiple parents" {
echo "$initialCommitHash,$newCommitHash"
flattenedCommitHash=$(dolt admin createchunk commit --root "$newRootValueHash" --desc "flattened history" \
--parents "$initialCommitHash,$newCommitHash" --branch mergeBranch)
run dolt show mergeBranch
echo "$output"
[[ "$output" =~ "Merge: $initialCommitHash $newCommitHash" ]] || false
[[ "$output" =~ "$flattenedCommitHash" ]] || false
}
@test "createchunk: create commit in CLI on existing branch" {
dolt branch existingBranch "$initialCommitHash"
# overwriting an existing branch is allowed if the current commit is a parent of the new onw
flattenedCommitHash=$(dolt admin createchunk commit --root "$newRootValueHash" --desc "flattened history" \
--parents "$initialCommitHash" --branch existingBranch)
run dolt log existingBranch
[ "$status" -eq 0 ]
[ "$(echo "$output" | grep -c commit)" -eq 2 ]
[[ "$output" =~ "Initialize data repository" ]] || false
[[ "$output" =~ "flattened history" ]] || false
# but overwriting an existing branch with a different history is an error
run dolt admin createchunk commit --root "$newRootValueHash" --desc "flattened history" --parents "$newCommitHash" --branch existingBranch
[ "$status" -eq 1 ]
[[ "$output" =~ "branch existingBranch already exists. If you wish to overwrite it, add the --force flag" ]] || false
# but we can make it succeed with --force, overwriting the branch
overwrittenCommitHash=$(dolt admin createchunk commit --root "$newRootValueHash" --desc "overwritten desc" \
--parents "$newCommitHash" --branch existingBranch --force)
run dolt log existingBranch
[ "$status" -eq 0 ]
[ "$(echo "$output" | grep -c commit)" -eq 4 ]
[[ "$output" =~ "Initialize data repository" ]] || false
[[ "$output" =~ "create table" ]] || false
[[ "$output" =~ "insert into table" ]] || false
[[ "$output" =~ "overwritten desc" ]] || false
[[ ! "$output" =~ "flattened history" ]] || false
}
@test "createchunk: attempt to create commit in CLI with no provided branch" {
run dolt admin createchunk commit --root "$newRootValueHash" --desc "flattened history" --parents "$initialCommitHash,$newCommitHash"
[ "$status" -eq 1 ]
[[ "$output" =~ "the --branch flag is required when creating a chunk using the CLI" ]] || false
}
@test "createchunk: create commit in SQL on existing branch" {
dolt branch existingBranch
run dolt sql -q "CALL DOLT_ADMIN_CREATECHUNK_COMMIT('--root', '$newRootValueHash', '--author', 'a <b@c.com>', \
'--desc', 'flattened history', '--parents', 'refs/internal/create', '--branch', 'existingBranch', '--force');" -r csv
echo "$output"
[ "$status" -eq 0 ]
flattenedCommitHash=$(echo "$output" | tail -n 1)
run dolt show existingBranch
echo "$output"
[ "$status" -eq 0 ]
[[ "$output" =~ "Author: a <b@c.com>" ]] || false
[[ "$output" =~ "flattened history" ]] || false
[[ "$output" =~ "added table" ]] || false
[[ "$output" =~ "| + | 1 |" ]] || false
# check that there are only two commits in the history
run dolt log existingBranch
[ "$status" -eq 0 ]
[ "$(echo "$output" | grep -c commit)" -eq 2 ]
[[ "$output" =~ "Initialize data repository" ]] || false
[[ "$output" =~ "flattened history" ]] || false
}
@test "createchunk: create commit in SQL on new branch" {
flattenedCommitHash=$(dolt sql -q "CALL DOLT_ADMIN_CREATECHUNK_COMMIT('--root', '$newRootValueHash', '--author', 'a <b@c.com>', '--desc',\
'flattened history', '--parents', 'refs/internal/create', '--branch', 'newBranch');" -r csv | tail -n 1)
run dolt show newBranch
echo "$output"
[ "$status" -eq 0 ]
[[ "$output" =~ "Author: a <b@c.com>" ]] || false
[[ "$output" =~ "flattened history" ]] || false
[[ "$output" =~ "added table" ]] || false
[[ "$output" =~ "| + | 1 |" ]] || false
# check that there are only two commits in the history
run dolt log newBranch
[ "$status" -eq 0 ]
[ "$(echo "$output" | grep -c commit)" -eq 2 ]
[[ "$output" =~ "Initialize data repository" ]] || false
[[ "$output" =~ "flattened history" ]] || false
}
@test "createchunk: create commit in SQL with no provided branch" {
run dolt sql -r csv <<SQL
CALL DOLT_ADMIN_CREATECHUNK_COMMIT('--root', '$newRootValueHash', '--author', 'a <b@c.com>', '--desc',
'flattened history', '--parents', '$initialCommitHash', '--branch', 'newBranch');
SELECT * from dolt_log;
SQL
[ "$status" -eq 0 ]
# Just capture the last four lines (the select)
run echo "$(echo "$output" | tail -n 4)"
echo "$output"
[[ "${lines[0]}" =~ "commit_hash,committer,email,date,message" ]] || false
[[ "${lines[1]}" =~ "insert into table" ]] || false
[[ "${lines[2]}" =~ "create table" ]] || false
[[ "${lines[3]}" =~ "Initialize data repository" ]] || false
}