Merge pull request #10483 from dolthub/db/dbfactory

Add `git+*` dbfactory remotes with required `--git-cache-dir`, `--ref` support, and integration tests
This commit is contained in:
Dustin Brown
2026-02-11 13:25:05 -08:00
committed by GitHub
21 changed files with 1320 additions and 25 deletions
+3
View File
@@ -146,6 +146,7 @@ func CreateCloneArgParser() *argparser.ArgParser {
ap.SupportsString(RemoteParam, "", "name", "Name of the remote to be added to the cloned database. The default is 'origin'.")
ap.SupportsString(BranchParam, "b", "branch", "The branch to be cloned. If not specified all branches will be cloned.")
ap.SupportsString(DepthFlag, "", "depth", "Clone a single branch and limit history to the given commit depth.")
ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).")
ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "")
ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes))
ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file.")
@@ -166,6 +167,7 @@ func CreateResetArgParser() *argparser.ArgParser {
func CreateRemoteArgParser() *argparser.ArgParser {
ap := argparser.NewArgParserWithVariableArgs("remote")
ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).")
return ap
}
@@ -266,6 +268,7 @@ func CreateBackupArgParser() *argparser.ArgParser {
ap.ArgListHelp = append(ap.ArgListHelp, [2]string{"profile", "AWS profile to use."})
ap.SupportsFlag(VerboseFlag, "v", "When printing the list of backups adds additional details.")
ap.SupportsFlag(ForceFlag, "f", "When restoring a backup, overwrite the contents of the existing database with the same name.")
ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).")
ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "")
ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes))
ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file")
+29 -5
View File
@@ -49,6 +49,18 @@ This default configuration is achieved by creating references to the remote bran
},
}
type remoteDialerWithGitCacheRoot struct {
dbfactory.GRPCDialProvider
root string
}
func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) {
if strings.TrimSpace(d.root) == "" {
return "", false
}
return d.root, true
}
type CloneCmd struct{}
// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
@@ -130,7 +142,11 @@ func clone(ctx context.Context, apr *argparser.ArgParseResults, dEnv *env.DoltEn
var r env.Remote
var srcDB *doltdb.DoltDB
r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv)
cloneRoot, err := dEnv.FS.Abs(dir)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv, cloneRoot)
if verr != nil {
return verr
}
@@ -187,15 +203,19 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE
urlStr := apr.Arg(0)
_, err := earl.Parse(urlStr)
if err != nil {
return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build()
if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok {
urlStr = normalized
} else {
return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build()
}
}
var dir string
if apr.NArg() == 2 {
dir = apr.Arg(1)
} else {
// Infer directory name from the URL.
dir = path.Base(urlStr)
if dir == "." {
dir = path.Dir(urlStr)
@@ -207,11 +227,15 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE
return dir, urlStr, nil
}
func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) {
func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv, cloneRoot string) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) {
cli.Printf("cloning %s\n", remoteUrl)
r := env.NewRemote(remoteName, remoteUrl, params)
ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dEnv)
dialer := dbfactory.GRPCDialProvider(dEnv)
if strings.TrimSpace(cloneRoot) != "" {
dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: dEnv, root: cloneRoot}
}
ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dialer)
if err != nil {
bdr := errhand.BuildDError("error: failed to get remote db").AddCause(err)
return env.NoRemote, nil, bdr.Build()
+11
View File
@@ -65,3 +65,14 @@ func TestParseDolthubRepos(t *testing.T) {
}
}
func TestCloneParseArgs_InferDir(t *testing.T) {
ap := CloneCmd{}.ArgParser()
apr, err := ap.Parse([]string{"https://example.com/org/repo.git"})
require.NoError(t, err)
dir, urlStr, verr := parseArgs(apr)
require.Nil(t, verr)
require.Equal(t, "repo.git", dir)
require.Equal(t, "https://example.com/org/repo.git", urlStr)
}
+8 -2
View File
@@ -78,6 +78,7 @@ func (cmd ReadTablesCmd) ArgParser() *argparser.ArgParser {
{"table", " Optional tables to retrieve. If omitted, all tables are retrieved."},
}
ap.SupportsString(dirParamName, "d", "directory", "directory to create and put retrieved table data.")
ap.SupportsString(gitRefFlag, "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).")
return ap
}
@@ -99,7 +100,11 @@ func (cmd ReadTablesCmd) Exec(ctx context.Context, commandStr string, args []str
_, err := earl.Parse(urlStr)
if err != nil {
return HandleVErrAndExitCode(errhand.BuildDError("Invalid remote url").AddCause(err).Build(), usage)
if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok {
urlStr = normalized
} else {
return HandleVErrAndExitCode(errhand.BuildDError("Invalid remote url").AddCause(err).Build(), usage)
}
}
dir := apr.GetValueOrDefault(dirParamName, path.Base(urlStr))
@@ -203,7 +208,8 @@ func pullTableValue(ctx context.Context, dEnv *env.DoltEnv, srcDB *doltdb.DoltDB
}
func getRemoteDBAtCommit(ctx context.Context, remoteUrl string, remoteUrlParams map[string]string, commitStr string, dEnv *env.DoltEnv) (*doltdb.DoltDB, doltdb.RootValue, errhand.VerboseError) {
_, srcDB, verr := createRemote(ctx, "temp", remoteUrl, remoteUrlParams, dEnv)
cacheRoot, _ := dEnv.GitCacheRoot()
_, srcDB, verr := createRemote(ctx, "temp", remoteUrl, remoteUrlParams, dEnv, cacheRoot)
if verr != nil {
return nil, nil, verr
+31
View File
@@ -0,0 +1,31 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package commands
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestReadTablesArgParser_AcceptsGitFlags(t *testing.T) {
ap := ReadTablesCmd{}.ArgParser()
apr, err := ap.Parse([]string{
"--" + gitRefFlag, "refs/dolt/custom",
"git+file:///tmp/remote.git", "main",
})
require.NoError(t, err)
require.Equal(t, "refs/dolt/custom", apr.GetValueOrDefault(gitRefFlag, ""))
}
+26
View File
@@ -71,6 +71,7 @@ const (
addRemoteId = "add"
removeRemoteId = "remove"
removeRemoteShortId = "rm"
gitRefFlag = "ref"
)
type RemoteCmd struct{}
@@ -212,6 +213,11 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) (
err = cli.AddAWSParams(remoteUrl, apr, params)
case dbfactory.OSSScheme:
err = cli.AddOSSParams(remoteUrl, apr, params)
case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme:
verr := addGitRemoteParams(apr, params)
if verr != nil {
return nil, verr
}
default:
err = cli.VerifyNoAwsParams(apr)
}
@@ -219,9 +225,29 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) (
return nil, errhand.VerboseErrorFromError(err)
}
// Flags that are only meaningful for git remotes should not be accepted for other schemes.
switch scheme {
case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme:
default:
if _, ok := apr.GetValue(gitRefFlag); ok {
return nil, errhand.BuildDError("error: --%s is only supported for git remotes", gitRefFlag).Build()
}
}
return params, nil
}
func addGitRemoteParams(apr *argparser.ArgParseResults, params map[string]string) errhand.VerboseError {
if v, ok := apr.GetValue(gitRefFlag); ok {
v = strings.TrimSpace(v)
if v == "" {
return errhand.BuildDError("error: --%s cannot be empty", gitRefFlag).Build()
}
params[dbfactory.GitRefParam] = v
}
return nil
}
// callSQLRemoteAdd calls the SQL function `call `dolt_remote('add', remoteName, remoteUrl)`
func callSQLRemoteAdd(sqlCtx *sql.Context, queryist cli.Queryist, remoteName, remoteUrl string) error {
qry, err := dbr.InterpolateForDialect("call dolt_remote('add', ?, ?)", []interface{}{remoteName, remoteUrl}, dialect.MySQL)
+11
View File
@@ -21,6 +21,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/utils/config"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
@@ -135,3 +136,13 @@ func TestGetAbsRemoteUrl(t *testing.T) {
})
}
}
func TestParseRemoteArgs_GitRef(t *testing.T) {
ap := RemoteCmd{}.ArgParser()
apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitRefFlag, "refs/dolt/custom"})
assert.NoError(t, err)
params, verr := parseRemoteArgs(apr, dbfactory.GitFileScheme, "git+file:///tmp/remote.git")
assert.Nil(t, verr)
assert.Equal(t, "refs/dolt/custom", params[dbfactory.GitRefParam])
}
+19 -9
View File
@@ -53,6 +53,12 @@ const (
OSSScheme = "oss"
// Git remote dbfactory schemes (Git remotes as Dolt remotes)
GitFileScheme = "git+file"
GitHTTPScheme = "git+http"
GitHTTPSScheme = "git+https"
GitSSHScheme = "git+ssh"
defaultScheme = HTTPSScheme
defaultMemTableSize = 256 * 1024 * 1024
)
@@ -69,15 +75,19 @@ type DBFactory interface {
// DBFactories is a map from url scheme name to DBFactory. Additional factories can be added to the DBFactories map
// from external packages.
var DBFactories = map[string]DBFactory{
AWSScheme: AWSFactory{},
OSSScheme: OSSFactory{},
GSScheme: GSFactory{},
OCIScheme: OCIFactory{},
FileScheme: FileFactory{},
MemScheme: MemFactory{},
LocalBSScheme: LocalBSFactory{},
HTTPScheme: NewDoltRemoteFactory(true),
HTTPSScheme: NewDoltRemoteFactory(false),
AWSScheme: AWSFactory{},
OSSScheme: OSSFactory{},
GSScheme: GSFactory{},
OCIScheme: OCIFactory{},
FileScheme: FileFactory{},
MemScheme: MemFactory{},
LocalBSScheme: LocalBSFactory{},
HTTPScheme: NewDoltRemoteFactory(true),
HTTPSScheme: NewDoltRemoteFactory(false),
GitFileScheme: GitRemoteFactory{},
GitHTTPScheme: GitRemoteFactory{},
GitHTTPSScheme: GitRemoteFactory{},
GitSSHScheme: GitRemoteFactory{},
}
// CreateDB creates a database based on the supplied urlStr, and creation params. The DBFactory used for creation is
@@ -0,0 +1,280 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package dbfactory
import (
"context"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/dolthub/dolt/go/store/blobstore"
"github.com/dolthub/dolt/go/store/datas"
"github.com/dolthub/dolt/go/store/nbs"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/types"
)
const (
// GitCacheRootParam is the absolute path to the local Dolt repository root (the directory that contains `.dolt/`).
// Required for git remotes. GitRemoteFactory stores its local cache repo under:
// `<git_cache_root>/.dolt/git-remote-cache/<sha256(remoteURL|remoteRef)>/repo.git`.
GitCacheRootParam = "git_cache_root"
GitRefParam = "git_ref"
GitRemoteNameParam = "git_remote_name"
defaultGitRef = "refs/dolt/data"
defaultGitRemoteName = "origin"
)
// GitCacheRootProvider provides the local Dolt repo root for per-repo git remote caches.
// Implementations should return ok=false when no repo root is available.
type GitCacheRootProvider interface {
GitCacheRoot() (string, bool)
}
// GitRemoteFactory opens a Dolt database backed by a Git remote, using a local bare
// repository as an object cache and remote configuration store.
//
// Supported schemes (registered in factory.go):
// - git+file
// - git+http
// - git+https
// - git+ssh
type GitRemoteFactory struct{}
var _ DBFactory = GitRemoteFactory{}
func (fact GitRemoteFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) error {
switch strings.ToLower(urlObj.Scheme) {
case GitFileScheme:
remoteURL, _, err := parseGitRemoteFactoryURL(urlObj, params)
if err != nil {
return err
}
if remoteURL.Scheme != "file" {
return fmt.Errorf("git+file: expected underlying file URL, got %q", remoteURL.Scheme)
}
p := filepath.Join(remoteURL.Host, filepath.FromSlash(remoteURL.Path))
if p == "" {
return fmt.Errorf("git+file: empty remote path")
}
if _, err := os.Stat(p); err == nil {
return nil
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
return runGitInitBare(ctx, p)
default:
return fmt.Errorf("prepare not supported for scheme %q", urlObj.Scheme)
}
}
func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) {
remoteURL, ref, err := parseGitRemoteFactoryURL(urlObj, params)
if err != nil {
return nil, nil, nil, err
}
cacheRoot, ok, err := resolveGitCacheRoot(params)
if err != nil {
return nil, nil, nil, err
}
if !ok {
return nil, nil, nil, fmt.Errorf("%s is required for git remotes", GitCacheRootParam)
}
cacheBase := filepath.Join(cacheRoot, DoltDir, "git-remote-cache")
cacheRepo, err := cacheRepoPath(cacheBase, remoteURL.String(), ref)
if err != nil {
return nil, nil, nil, err
}
if err := ensureBareRepo(ctx, cacheRepo); err != nil {
return nil, nil, nil, err
}
remoteName := resolveGitRemoteName(params)
// Ensure the configured git remote exists and points to the underlying git remote URL.
if err := ensureGitRemoteURL(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil {
return nil, nil, nil, err
}
q := nbs.NewUnlimitedMemQuotaProvider()
cs, err := nbs.NewGitStore(ctx, nbf.VersionString(), cacheRepo, ref, blobstore.GitBlobstoreOptions{RemoteName: remoteName}, defaultMemTableSize, q)
if err != nil {
return nil, nil, nil, err
}
vrw := types.NewValueStore(cs)
ns := tree.NewNodeStore(cs)
db := datas.NewTypesDatabase(vrw, ns)
return db, vrw, ns, nil
}
func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (remoteURL *url.URL, ref string, err error) {
if urlObj == nil {
return nil, "", fmt.Errorf("nil url")
}
scheme := strings.ToLower(urlObj.Scheme)
if !strings.HasPrefix(scheme, "git+") {
return nil, "", fmt.Errorf("expected git+ scheme, got %q", urlObj.Scheme)
}
underlyingScheme := strings.TrimPrefix(scheme, "git+")
if underlyingScheme == "" {
return nil, "", fmt.Errorf("invalid git+ scheme %q", urlObj.Scheme)
}
ref = resolveGitRemoteRef(params)
cp := *urlObj
cp.Scheme = underlyingScheme
cp.RawQuery = ""
cp.Fragment = ""
return &cp, ref, nil
}
func resolveGitRemoteRef(params map[string]interface{}) string {
// Prefer an explicit remote parameter (e.g. from `--ref`).
if params != nil {
if v, ok := params[GitRefParam]; ok && v != nil {
s, ok := v.(string)
if ok {
if s = strings.TrimSpace(s); s != "" {
return s
}
}
}
}
return defaultGitRef
}
func resolveGitRemoteName(params map[string]interface{}) string {
if params != nil {
if v, ok := params[GitRemoteNameParam]; ok && v != nil {
s, ok := v.(string)
if ok {
if s = strings.TrimSpace(s); s != "" {
return s
}
}
}
}
return defaultGitRemoteName
}
// resolveGitCacheRoot parses and validates GitCacheRootParam.
// It returns ok=false when the param is not present.
func resolveGitCacheRoot(params map[string]interface{}) (root string, ok bool, err error) {
if params == nil {
return "", false, nil
}
v, ok := params[GitCacheRootParam]
if !ok || v == nil {
return "", false, nil
}
s, ok := v.(string)
if !ok {
return "", false, fmt.Errorf("%s must be a string", GitCacheRootParam)
}
if strings.TrimSpace(s) == "" {
return "", false, fmt.Errorf("%s cannot be empty", GitCacheRootParam)
}
return s, true, nil
}
func cacheRepoPath(cacheBase, remoteURL, ref string) (string, error) {
if strings.TrimSpace(cacheBase) == "" {
return "", fmt.Errorf("empty git cache base")
}
sum := sha256.Sum256([]byte(remoteURL + "|" + ref))
h := hex.EncodeToString(sum[:])
return filepath.Join(cacheBase, h, "repo.git"), nil
}
func ensureBareRepo(ctx context.Context, gitDir string) error {
if gitDir == "" {
return fmt.Errorf("empty gitDir")
}
if st, err := os.Stat(gitDir); err == nil {
if !st.IsDir() {
return fmt.Errorf("git cache repo path is not a directory: %s", gitDir)
}
return nil
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
if err := os.MkdirAll(filepath.Dir(gitDir), 0o755); err != nil {
return err
}
return runGitInitBare(ctx, gitDir)
}
func ensureGitRemoteURL(ctx context.Context, gitDir string, remoteName string, remoteURL string) error {
if strings.TrimSpace(remoteName) == "" {
return fmt.Errorf("empty remote name")
}
if strings.TrimSpace(remoteURL) == "" {
return fmt.Errorf("empty remote url")
}
// Insert `--` so remoteName can't be interpreted as a flag.
got, err := runGitInDir(ctx, gitDir, "remote", "get-url", "--", remoteName)
if err != nil {
// Remote likely doesn't exist; attempt to add.
return runGitInDirNoOutput(ctx, gitDir, "remote", "add", "--", remoteName, remoteURL)
}
got = strings.TrimSpace(got)
if got == remoteURL {
return nil
}
return runGitInDirNoOutput(ctx, gitDir, "remote", "set-url", "--", remoteName, remoteURL)
}
func runGitInitBare(ctx context.Context, dir string) error {
_, err := exec.LookPath("git")
if err != nil {
return fmt.Errorf("git not found on PATH: %w", err)
}
cmd := exec.CommandContext(ctx, "git", "init", "--bare", dir) //nolint:gosec // controlled args
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("git init --bare failed: %w\noutput:\n%s", err, strings.TrimSpace(string(out)))
}
return nil
}
func runGitInDir(ctx context.Context, gitDir string, args ...string) (string, error) {
_, err := exec.LookPath("git")
if err != nil {
return "", fmt.Errorf("git not found on PATH: %w", err)
}
all := append([]string{"--git-dir", gitDir}, args...)
cmd := exec.CommandContext(ctx, "git", all...) //nolint:gosec // controlled args
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git %s failed: %w\noutput:\n%s", strings.Join(args, " "), err, strings.TrimSpace(string(out)))
}
return string(out), nil
}
func runGitInDirNoOutput(ctx context.Context, gitDir string, args ...string) error {
_, err := runGitInDir(ctx, gitDir, args...)
return err
}
@@ -0,0 +1,187 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package dbfactory
import (
"context"
"crypto/sha256"
"encoding/hex"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/datas"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/testutils/gitrepo"
"github.com/dolthub/dolt/go/store/types"
)
// t.TempDir() includes the test name on disk, which can create very long paths on Windows.
// These tests create deep `refs/...` paths inside bare git repos and can hit MAX_PATH without
// long path support enabled. Use a short temp prefix on Windows to keep paths under the limit.
func shortTempDir(t *testing.T) string {
t.Helper()
if runtime.GOOS != "windows" {
return t.TempDir()
}
dir, err := os.MkdirTemp("", "dolt")
require.NoError(t, err)
t.Cleanup(func() { _ = os.RemoveAll(dir) })
return dir
}
func TestGitRemoteFactory_GitFile_RequiresGitCacheRootParam(t *testing.T) {
ctx := context.Background()
_, _, _, err := CreateDB(ctx, types.Format_Default, "git+file:///tmp/remote.git", map[string]interface{}{})
require.Error(t, err)
require.Contains(t, err.Error(), GitCacheRootParam)
}
func TestGitRemoteFactory_GitFile_CachesUnderRepoDoltDirAndCanWrite(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found on PATH")
}
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
localRepoRoot := shortTempDir(t)
remotePath := filepath.ToSlash(remoteRepo.GitDir)
remoteURL := "file://" + remotePath
urlStr := "git+file://" + remotePath
params := map[string]interface{}{
GitCacheRootParam: localRepoRoot,
}
db, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params)
require.NoError(t, err)
require.NotNil(t, db)
require.NotNil(t, vrw)
// Ensure cache repo created under <repoRoot>/.dolt/git-remote-cache.
cacheBase := filepath.Join(localRepoRoot, DoltDir, "git-remote-cache")
sum := sha256.Sum256([]byte(remoteURL + "|" + "refs/dolt/data"))
h := hex.EncodeToString(sum[:])
cacheRepo := filepath.Join(cacheBase, h, "repo.git")
_, err = os.Stat(filepath.Join(cacheRepo, "HEAD"))
require.NoError(t, err)
vs, ok := vrw.(*types.ValueStore)
require.True(t, ok, "expected ValueReadWriter to be *types.ValueStore, got %T", vrw)
cs := vs.ChunkStore()
// Minimal write: put one chunk and commit its hash as the root.
c := chunks.NewChunk([]byte("hello\n"))
err = cs.Put(ctx, c, func(chunks.Chunk) chunks.GetAddrsCb {
return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil }
})
require.NoError(t, err)
last, err := cs.Root(ctx)
require.NoError(t, err)
okCommit, err := cs.Commit(ctx, c.Hash(), last)
require.NoError(t, err)
require.True(t, okCommit)
require.NoError(t, db.Close())
// Remote should now have refs/dolt/data.
cmd := exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "rev-parse", "--verify", "--quiet", "refs/dolt/data^{commit}")
out, err := cmd.CombinedOutput()
require.NoError(t, err, "git rev-parse failed: %s", strings.TrimSpace(string(out)))
}
func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found on PATH")
}
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
remotePath := filepath.ToSlash(remoteRepo.GitDir)
urlStr := "git+file://" + remotePath
noopGetAddrs := func(chunks.Chunk) chunks.GetAddrsCb {
return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil }
}
open := func(cacheRoot string) (db datas.Database, cs chunks.ChunkStore) {
params := map[string]interface{}{
GitCacheRootParam: cacheRoot,
}
d, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params)
require.NoError(t, err)
require.NotNil(t, d)
require.NotNil(t, vrw)
vs, ok := vrw.(*types.ValueStore)
require.True(t, ok, "expected ValueReadWriter to be *types.ValueStore, got %T", vrw)
return d, vs.ChunkStore()
}
cacheA := shortTempDir(t)
cacheB := shortTempDir(t)
// Client A writes a root pointing at chunk A.
dbA, csA := open(cacheA)
cA := chunks.NewChunk([]byte("clientA\n"))
require.NoError(t, csA.Put(ctx, cA, noopGetAddrs))
lastA, err := csA.Root(ctx)
require.NoError(t, err)
okCommitA, err := csA.Commit(ctx, cA.Hash(), lastA)
require.NoError(t, err)
require.True(t, okCommitA)
require.NoError(t, dbA.Close())
// Client B reads chunk A, then writes chunk B and updates the root.
dbB, csB := open(cacheB)
require.NoError(t, csB.Rebase(ctx))
rootB, err := csB.Root(ctx)
require.NoError(t, err)
require.Equal(t, cA.Hash(), rootB)
gotA, err := csB.Get(ctx, cA.Hash())
require.NoError(t, err)
require.Equal(t, "clientA\n", string(gotA.Data()))
cB := chunks.NewChunk([]byte("clientB\n"))
require.NoError(t, csB.Put(ctx, cB, noopGetAddrs))
okCommitB, err := csB.Commit(ctx, cB.Hash(), rootB)
require.NoError(t, err)
require.True(t, okCommitB)
require.NoError(t, dbB.Close())
// Client A re-opens and should see B's update.
dbA2, csA2 := open(cacheA)
require.NoError(t, csA2.Rebase(ctx))
rootA2, err := csA2.Root(ctx)
require.NoError(t, err)
require.Equal(t, cB.Hash(), rootA2)
gotB, err := csA2.Get(ctx, cB.Hash())
require.NoError(t, err)
require.Equal(t, "clientB\n", string(gotB.Data()))
require.NoError(t, dbA2.Close())
}
+19 -1
View File
@@ -184,6 +184,16 @@ func (dEnv *DoltEnv) UrlStr() string {
return dEnv.urlStr
}
// GitCacheRoot returns the absolute path to the local Dolt repository root (the directory that contains `.dolt/`).
// It is used to place git-remote caches under `<repoRoot>/.dolt/...`.
func (dEnv *DoltEnv) GitCacheRoot() (string, bool) {
doltDir := dEnv.GetDoltDir()
if doltDir == "" {
return "", false
}
return filepath.Dir(doltDir), true
}
func createRepoState(fs filesys.Filesys) (*RepoState, error) {
repoState, rsErr := LoadRepoState(fs)
@@ -530,7 +540,8 @@ var ErrCannotCreateDoltDirAlreadyExists = errors.New(".dolt dir already exists")
// * |dir|/.dolt exists and is a directory and is empty, or
// * |dir|/.dolt exists and is a directory and has only one other entry in it, a directory with name "tmp", or
// * |dir|/.dolt exists and is a directory and has only one other entry in it, a file with name "config.json", or
// * |dir|/.dolt exists and is a directory and contains both a |tmp| directory and a |config.json| file and nothing else.
// * |dir|/.dolt exists and is a directory and contains both a |tmp| directory and a |config.json| file and nothing else, or
// * |dir|/.dolt exists and is a directory and contains a |git-remote-cache| directory (and any contents under it) plus any of the above.
func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) {
absPath, err := fs.Abs(dir)
if err != nil {
@@ -550,6 +561,7 @@ func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) {
}
tmpPath := filepath.Join(doltDirPath, TmpDirName)
configPath := filepath.Join(doltDirPath, configFile)
gitRemoteCachePath := filepath.Join(doltDirPath, "git-remote-cache")
isOK := true
err := fs.Iter(doltDirPath, true, func(path string, sz int64, isDir bool) (stop bool) {
if path == doltDirPath {
@@ -558,6 +570,12 @@ func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) {
return false
} else if path == configPath && !isDir {
return false
} else if path == gitRemoteCachePath && isDir {
// Allow git remote cache contents to exist under .dolt/ when cloning / creating a DB.
return false
} else if strings.HasPrefix(path, gitRemoteCachePath+string(filepath.Separator)) {
// Allow any children of .dolt/git-remote-cache.
return false
} else {
isOK = false
return true
+23
View File
@@ -38,6 +38,29 @@ const (
workingDir = "/user/bheni/datasets/addresses"
)
func TestCanCreateDatabaseAtPathAllowsGitRemoteCache(t *testing.T) {
dir := "/user/bheni/datasets/allow_git_remote_cache"
doltDir := filepath.Join(dir, dbfactory.DoltDir)
cacheDir := filepath.Join(doltDir, "git-remote-cache")
// Any contents under .dolt/git-remote-cache should be ignored by CanCreateDatabaseAtPath.
fs := filesys.NewInMemFS(
[]string{
testHomeDir,
dir,
doltDir,
cacheDir,
filepath.Join(cacheDir, "somecache"),
},
map[string][]byte{},
dir,
)
ok, err := CanCreateDatabaseAtPath(fs, dir)
require.NoError(t, err)
require.True(t, ok)
}
func testHomeDirFunc() (string, error) {
return testHomeDir, nil
}
+176
View File
@@ -0,0 +1,176 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package env
import (
"fmt"
"net/url"
"path/filepath"
"strings"
)
var supportedGitPlusSchemes = map[string]struct{}{
"git+file": {},
"git+http": {},
"git+https": {},
"git+ssh": {},
}
var supportedUnderlyingGitSchemes = map[string]struct{}{
"file": {},
"http": {},
"https": {},
"ssh": {},
}
// NormalizeGitRemoteUrl translates user-provided git remote strings into a canonical dbfactory URL
// using git+* schemes.
//
// It accepts:
// - Explicit dbfactory URLs: git+file/http/https/ssh://...
// - URLs ending in .git: file/http/https/ssh URLs
// - scp-style ssh: [user@]host:path/repo.git
// - schemeless host/path: host/org/repo.git (defaults to git+https)
// - local paths ending in .git (absolute or relative) (translated to git+file)
//
// It returns ok=false when the input is not recognized as a git remote URL (so callers can fall back
// to existing remote handling).
func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error) {
urlArg = strings.TrimSpace(urlArg)
if urlArg == "" {
return "", false, nil
}
// Fast-path: explicit git+* dbfactory URL.
if strings.HasPrefix(strings.ToLower(urlArg), "git+") {
u, err := url.Parse(urlArg)
if err != nil {
return "", false, err
}
if _, ok := supportedGitPlusSchemes[strings.ToLower(u.Scheme)]; !ok {
return "", false, fmt.Errorf("unsupported git dbfactory scheme %q", u.Scheme)
}
return u.String(), true, nil
}
// Only translate obvious git remote strings (must end in .git).
base := stripQueryAndFragment(urlArg)
if !strings.HasSuffix(base, ".git") {
return "", false, nil
}
// scp-like ssh: [user@]host:path/repo.git (no scheme, no ://)
if isScpLikeGitRemote(urlArg) {
host, p := splitScpLike(urlArg)
ssh := "git+ssh://" + host + "/" + strings.TrimPrefix(p, "/")
u, err := url.Parse(ssh)
if err != nil {
return "", false, err
}
return u.String(), true, nil
}
// file/http/https/ssh url with a scheme.
if strings.Contains(urlArg, "://") {
u, err := url.Parse(urlArg)
if err != nil {
return "", false, err
}
s := strings.ToLower(u.Scheme)
if _, ok := supportedUnderlyingGitSchemes[s]; !ok {
return "", false, nil
}
u.Scheme = "git+" + s
return u.String(), true, nil
}
// Local filesystem path (absolute or relative).
if looksLikeLocalPath(urlArg) {
abs, err := filepath.Abs(urlArg)
if err != nil {
return "", false, err
}
abs = filepath.ToSlash(abs)
u, err := url.Parse("git+file://" + abs)
if err != nil {
return "", false, err
}
return u.String(), true, nil
}
// Schemeless host/path.git defaults to https.
u, err := url.Parse("git+https://" + urlArg)
if err != nil {
return "", false, err
}
return u.String(), true, nil
}
func stripQueryAndFragment(s string) string {
// Order matters: strip fragment then query.
if i := strings.IndexByte(s, '#'); i >= 0 {
s = s[:i]
}
if i := strings.IndexByte(s, '?'); i >= 0 {
s = s[:i]
}
return s
}
func looksLikeLocalPath(s string) bool {
// Treat absolute filesystem paths as local paths, including Windows drive-letter and UNC paths.
if filepath.IsAbs(s) {
return true
}
return strings.HasPrefix(s, "./") || strings.HasPrefix(s, "../")
}
func isScpLikeGitRemote(s string) bool {
// This intentionally keeps the matcher simple:
// - no scheme (no "://")
// - contains a single ':' separating host from path
// - host part contains no '/'
// - path ends in .git (already checked by caller)
if strings.Contains(s, "://") {
return false
}
colon := strings.IndexByte(s, ':')
if colon < 0 {
return false
}
host := s[:colon]
path := s[colon+1:]
if host == "" || path == "" {
return false
}
if strings.Contains(host, "/") {
return false
}
// Avoid misclassifying Windows paths; host must contain a dot or an '@' (git@host:...).
if !strings.Contains(host, ".") && !strings.Contains(host, "@") {
return false
}
return true
}
func splitScpLike(s string) (host string, path string) {
i := strings.IndexByte(s, ':')
if i < 0 {
return "", s
}
return s[:i], s[i+1:]
}
// NOTE: we intentionally do not reject URL query parameters (including `ref=`) here.
+74
View File
@@ -0,0 +1,74 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package env
import (
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func TestNormalizeGitRemoteUrl(t *testing.T) {
t.Run("empty not recognized", func(t *testing.T) {
got, ok, err := NormalizeGitRemoteUrl("")
require.NoError(t, err)
require.False(t, ok)
require.Empty(t, got)
})
t.Run("explicit git+https keeps scheme", func(t *testing.T) {
got, ok, err := NormalizeGitRemoteUrl("git+https://example.com/org/repo.git")
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "git+https://example.com/org/repo.git", got)
})
t.Run("https .git becomes git+https", func(t *testing.T) {
got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git")
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "git+https://example.com/org/repo.git", got)
})
t.Run("scp-style becomes git+ssh", func(t *testing.T) {
got, ok, err := NormalizeGitRemoteUrl("git@github.com:org/repo.git")
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "git+ssh://git@github.com/org/repo.git", got)
})
t.Run("schemeless host/path defaults to git+https", func(t *testing.T) {
got, ok, err := NormalizeGitRemoteUrl("github.com/org/repo.git")
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "git+https://github.com/org/repo.git", got)
})
t.Run("local absolute path becomes git+file", func(t *testing.T) {
p := filepath.ToSlash(filepath.Join(t.TempDir(), "remote.git"))
got, ok, err := NormalizeGitRemoteUrl(p)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "git+file://"+p, got)
})
t.Run("non .git url not recognized", func(t *testing.T) {
got, ok, err := NormalizeGitRemoteUrl("https://example.com/not-git")
require.NoError(t, err)
require.False(t, ok)
require.Empty(t, got)
})
}
+30
View File
@@ -104,6 +104,14 @@ func (r *Remote) GetRemoteDB(ctx context.Context, nbf *types.NomsBinFormat, dial
}
params[dbfactory.GRPCDialProviderParam] = dialer
if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") {
params[dbfactory.GitRemoteNameParam] = r.Name
if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok {
if root, ok := p.GitCacheRoot(); ok {
params[dbfactory.GitCacheRootParam] = root
}
}
}
return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params)
}
@@ -117,6 +125,14 @@ func (r *Remote) Prepare(ctx context.Context, nbf *types.NomsBinFormat, dialer d
}
params[dbfactory.GRPCDialProviderParam] = dialer
if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") {
params[dbfactory.GitRemoteNameParam] = r.Name
if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok {
if root, ok := p.GitCacheRoot(); ok {
params[dbfactory.GitCacheRootParam] = root
}
}
}
return dbfactory.PrepareDB(ctx, nbf, r.Url, params)
}
@@ -128,6 +144,14 @@ func (r *Remote) GetRemoteDBWithoutCaching(ctx context.Context, nbf *types.NomsB
}
params[dbfactory.NoCachingParameter] = "true"
params[dbfactory.GRPCDialProviderParam] = dialer
if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") {
params[dbfactory.GitRemoteNameParam] = r.Name
if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok {
if root, ok := p.GitCacheRoot(); ok {
params[dbfactory.GitCacheRootParam] = root
}
}
}
return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params)
}
@@ -643,6 +667,12 @@ func NewPullSpec[C doltdb.Context](
}
func GetAbsRemoteUrl(fs filesys2.Filesys, cfg config.ReadableConfig, urlArg string) (string, string, error) {
if normalized, ok, nerr := NormalizeGitRemoteUrl(urlArg); nerr != nil {
return "", "", nerr
} else if ok {
urlArg = normalized
}
u, err := earl.Parse(urlArg)
if err != nil {
return "", "", err
@@ -77,6 +77,18 @@ type DoltDatabaseProvider struct {
InitDatabaseHooks []InitDatabaseHook
}
type remoteDialerWithGitCacheRoot struct {
dbfactory.GRPCDialProvider
root string
}
func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) {
if strings.TrimSpace(d.root) == "" {
return "", false
}
return d.root, true
}
var _ sql.DatabaseProvider = (*DoltDatabaseProvider)(nil)
var _ sql.FunctionProvider = (*DoltDatabaseProvider)(nil)
var _ sql.MutableDatabaseProvider = (*DoltDatabaseProvider)(nil)
@@ -502,10 +514,26 @@ func (p *DoltDatabaseProvider) allRevisionDbs(ctx *sql.Context, db dsess.SqlData
}
func (p *DoltDatabaseProvider) GetRemoteDB(ctx context.Context, format *types.NomsBinFormat, r env.Remote, withCaching bool) (*doltdb.DoltDB, error) {
if withCaching {
return r.GetRemoteDB(ctx, format, p.remoteDialer)
// For git remotes, thread through the initiating database's repo root so git caches can be located under
// `<repoRoot>/.dolt/...` instead of a user-global cache dir.
dialer := p.remoteDialer
if sqlCtx, ok := ctx.(*sql.Context); ok {
baseName, _ := doltdb.SplitRevisionDbName(sqlCtx.GetCurrentDatabase())
dbKey := strings.ToLower(baseName)
p.mu.RLock()
dbLoc, ok := p.dbLocations[dbKey]
p.mu.RUnlock()
if ok && dbLoc != nil {
if root, err := dbLoc.Abs("."); err == nil && strings.TrimSpace(root) != "" {
dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: root}
}
}
}
return r.GetRemoteDBWithoutCaching(ctx, format, p.remoteDialer)
if withCaching {
return r.GetRemoteDB(ctx, format, dialer)
}
return r.GetRemoteDBWithoutCaching(ctx, format, dialer)
}
func (p *DoltDatabaseProvider) CreateDatabase(ctx *sql.Context, name string) error {
@@ -814,7 +842,11 @@ func (p *DoltDatabaseProvider) cloneDatabaseFromRemote(
}
r := env.NewRemote(remoteName, remoteUrl, remoteParams)
srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, p.remoteDialer)
destRoot, err := p.fs.Abs(dbName)
if err != nil {
return err
}
srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: destRoot})
if err != nil {
return err
}
@@ -307,6 +307,14 @@ func syncRemote(ctx *sql.Context, dbData env.DbData[*sql.Context], dsess *dsess.
// not AWS, it verifies that no AWS parameters are present in |apr|.
func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (map[string]string, error) {
params := map[string]string{}
isGitRemote := urlScheme == dbfactory.GitFileScheme || urlScheme == dbfactory.GitHTTPScheme || urlScheme == dbfactory.GitHTTPSScheme || urlScheme == dbfactory.GitSSHScheme
if !isGitRemote {
if _, ok := apr.GetValue("ref"); ok {
return nil, fmt.Errorf("error: --ref is only supported for git remotes")
}
}
var err error
switch urlScheme {
case dbfactory.AWSScheme:
@@ -315,6 +323,15 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma
// TODO(elianddb): This func mainly interfaces with apr to set the OSS key-vals in params, but the backup arg
// parser does not include any OSS-related flags? I'm guessing they must be processed elsewhere?
err = cli.AddOSSParams(url, apr, params)
case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme:
err = cli.VerifyNoAwsParams(apr)
if ref, ok := apr.GetValue("ref"); ok {
ref = strings.TrimSpace(ref)
if ref == "" {
return nil, fmt.Errorf("error: --ref cannot be empty")
}
params[dbfactory.GitRefParam] = ref
}
default:
err = cli.VerifyNoAwsParams(apr)
}
@@ -16,6 +16,7 @@ package dprocedures
import (
"path"
"strings"
"github.com/dolthub/go-mysql-server/sql"
@@ -45,7 +46,7 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) {
}
sess := dsess.DSessFromSess(ctx.Session)
_, remoteUrl, err := env.GetAbsRemoteUrl(sess.Provider().FileSystem(), emptyConfig(), urlStr)
scheme, remoteUrl, err := env.GetAbsRemoteUrl(sess.Provider().FileSystem(), emptyConfig(), urlStr)
if err != nil {
return nil, errhand.BuildDError("error: '%s' is not valid.", urlStr).Build()
}
@@ -57,6 +58,19 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) {
remoteParms[dbfactory.GRPCUsernameAuthParam] = user
}
isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme
if ref, ok := apr.GetValue("ref"); ok {
ref = strings.TrimSpace(ref)
if ref == "" {
return nil, errhand.BuildDError("error: --ref cannot be empty").Build()
}
if !isGitRemote {
return nil, errhand.BuildDError("error: --ref is only supported for git remotes").Build()
}
remoteParms[dbfactory.GitRefParam] = ref
}
depth, ok := apr.GetInt(cli.DepthFlag)
if !ok {
depth = -1
@@ -82,7 +96,11 @@ func getDirectoryAndUrlString(apr *argparser.ArgParseResults) (string, string, e
urlStr := apr.Arg(0)
_, err := earl.Parse(urlStr)
if err != nil {
return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build()
if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok {
urlStr = normalized
} else {
return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build()
}
}
var dir string
@@ -22,6 +22,7 @@ import (
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
@@ -97,12 +98,27 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr
return err
}
_, absRemoteUrl, err := env.GetAbsRemoteUrl(dbFs, &config.MapConfig{}, remoteUrl)
scheme, absRemoteUrl, err := env.GetAbsRemoteUrl(dbFs, &config.MapConfig{}, remoteUrl)
if err != nil {
return err
}
r := env.NewRemote(remoteName, absRemoteUrl, map[string]string{})
params := map[string]string{}
isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme
if ref, ok := apr.GetValue("ref"); ok {
ref = strings.TrimSpace(ref)
if ref == "" {
return fmt.Errorf("error: --ref cannot be empty")
}
if !isGitRemote {
return fmt.Errorf("error: --ref is only supported for git remotes")
}
params[dbfactory.GitRefParam] = ref
}
r := env.NewRemote(remoteName, absRemoteUrl, params)
return dbd.Rsw.AddRemote(r)
}
+204
View File
@@ -0,0 +1,204 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
skiponwindows "tests are flaky on Windows"
skip_if_remote
setup_common
if ! command -v git >/dev/null 2>&1; then
skip "git not installed"
fi
cd $BATS_TMPDIR
cd dolt-repo-$$
mkdir "dolt-repo-clones"
}
teardown() {
assert_feature_version
teardown_common
}
@test "remotes-git: smoke push/clone/push-back/pull" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt sql -q "create table test(pk int primary key, v int);"
dolt add .
dolt commit -m "create table"
dolt remote add origin ../remote.git
run dolt push --set-upstream origin main
[ "$status" -eq 0 ]
cd ..
cd dolt-repo-clones
run dolt clone ../remote.git repo2
[ "$status" -eq 0 ]
cd repo2
dolt sql -q "insert into test values (1, 10);"
dolt add .
dolt commit -m "add row"
run dolt push origin main
[ "$status" -eq 0 ]
cd ../../repo1
run dolt pull
[ "$status" -eq 0 ]
run dolt sql -q "select v from test where pk = 1;" -r csv
[ "$status" -eq 0 ]
[[ "$output" =~ "10" ]] || false
}
@test "remotes-git: empty remote bootstrap creates refs/dolt/data" {
mkdir remote.git
git init --bare remote.git
# Assert the dolt data ref doesn't exist yet.
run git --git-dir remote.git show-ref refs/dolt/data
[ "$status" -eq 1 ]
mkdir repo1
cd repo1
dolt init
dolt sql -q "create table test(pk int primary key);"
dolt add .
dolt commit -m "create table"
dolt remote add origin ../remote.git
run dolt push --set-upstream origin main
[ "$status" -eq 0 ]
run git --git-dir ../remote.git show-ref refs/dolt/data
[ "$status" -eq 0 ]
}
@test "remotes-git: pull also fetches branches from git remote" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt remote add origin ../remote.git
dolt push origin main
cd ..
cd dolt-repo-clones
run dolt clone ../remote.git repo2
[ "$status" -eq 0 ]
cd repo2
run dolt branch -va
[[ "$output" =~ "main" ]] || false
[[ ! "$output" =~ "other" ]] || false
cd ../../repo1
dolt checkout -b other
dolt commit --allow-empty -m "first commit on other"
dolt push origin other
cd ../dolt-repo-clones/repo2
dolt pull
run dolt branch -va
[[ "$output" =~ "main" ]] || false
[[ "$output" =~ "other" ]] || false
}
@test "remotes-git: pull fetches but does not merge other branches" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt remote add origin ../remote.git
dolt push --set-upstream origin main
dolt checkout -b other
dolt commit --allow-empty -m "first commit on other"
dolt push --set-upstream origin other
cd ..
cd dolt-repo-clones
run dolt clone ../remote.git repo2
[ "$status" -eq 0 ]
cd repo2
main_state1=$(get_head_commit)
run dolt pull
[ "$status" -eq 0 ]
main_state2=$(get_head_commit)
[[ "$main_state1" = "$main_state2" ]] || false
run dolt branch -va
[[ "$output" =~ "main" ]] || false
[[ "$output" =~ "other" ]] || false
run dolt checkout other
[ "$status" -eq 0 ]
[[ "$output" =~ "branch 'other' set up to track 'origin/other'." ]] || false
run dolt log --oneline -n 1
[ "$status" -eq 0 ]
[[ "$output" =~ "first commit on other" ]] || false
}
@test "remotes-git: custom --ref writes to configured dolt data ref" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt sql -q "create table test(pk int primary key, v int);"
dolt sql -q "insert into test values (1, 111);"
dolt add .
dolt commit -m "seed"
dolt remote add --ref refs/dolt/custom origin ../remote.git
run dolt push --set-upstream origin main
[ "$status" -eq 0 ]
run git --git-dir ../remote.git show-ref refs/dolt/custom
[ "$status" -eq 0 ]
run git --git-dir ../remote.git show-ref refs/dolt/data
[ "$status" -ne 0 ]
cd ..
cd dolt-repo-clones
run dolt clone --ref refs/dolt/custom ../remote.git repo2
[ "$status" -eq 0 ]
cd repo2
run dolt sql -q "select v from test where pk = 1;" -r csv
[ "$status" -eq 0 ]
[[ "$output" =~ "111" ]] || false
run git --git-dir ../../remote.git show-ref refs/dolt/data
[ "$status" -ne 0 ]
}
@test "remotes-git: push works with per-repo git cache under .dolt/" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt commit --allow-empty -m "init"
dolt remote add origin ../remote.git
run dolt push --set-upstream origin main
[ "$status" -eq 0 ]
}
@@ -0,0 +1,98 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
skiponwindows "tests are flaky on Windows"
skip_if_remote
setup_common
if ! command -v git >/dev/null 2>&1; then
skip "git not installed"
fi
cd $BATS_TMPDIR
cd dolt-repo-$$
}
teardown() {
assert_feature_version
teardown_common
}
@test "sql-remotes-git: dolt_remote add supports --ref for git remotes" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt sql -q "create table test(pk int primary key, v int);"
dolt sql -q "insert into test values (1, 111);"
dolt add .
dolt commit -m "seed"
run dolt sql <<SQL
CALL dolt_remote('add', '--ref', 'refs/dolt/custom', 'origin', '../remote.git');
CALL dolt_push('origin', 'main');
SQL
[ "$status" -eq 0 ]
run git --git-dir ../remote.git show-ref refs/dolt/custom
[ "$status" -eq 0 ]
run git --git-dir ../remote.git show-ref refs/dolt/data
[ "$status" -ne 0 ]
}
@test "sql-remotes-git: dolt_clone supports --ref for git remotes" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt sql -q "create table test(pk int primary key, v int);"
dolt sql -q "insert into test values (1, 111);"
dolt add .
dolt commit -m "seed"
dolt remote add --ref refs/dolt/custom origin ../remote.git
dolt push --set-upstream origin main
cd ..
mkdir host
cd host
dolt init
run dolt sql -q "call dolt_clone('--ref', 'refs/dolt/custom', '../remote.git', 'repo2');"
[ "$status" -eq 0 ]
cd repo2
run dolt sql -q "select v from test where pk = 1;" -r csv
[ "$status" -eq 0 ]
[[ "$output" =~ "111" ]] || false
run git --git-dir ../../remote.git show-ref refs/dolt/custom
[ "$status" -eq 0 ]
run git --git-dir ../../remote.git show-ref refs/dolt/data
[ "$status" -ne 0 ]
}
@test "sql-remotes-git: dolt_backup sync-url supports --ref for git remotes" {
mkdir remote.git
git init --bare remote.git
mkdir repo1
cd repo1
dolt init
dolt sql -q "create table test(pk int primary key, v int);"
dolt sql -q "insert into test values (1, 111);"
dolt add .
dolt commit -m "seed"
run dolt sql -q "call dolt_backup('sync-url', '--ref', 'refs/dolt/custom', '../remote.git');"
[ "$status" -eq 0 ]
run git --git-dir ../remote.git show-ref refs/dolt/custom
[ "$status" -eq 0 ]
run git --git-dir ../remote.git show-ref refs/dolt/data
[ "$status" -ne 0 ]
}