/{go, integration-tests}: fix repo cache

This commit is contained in:
coffeegoddd☕️✨
2026-02-11 11:45:44 -08:00
parent ae5544d9a1
commit e64f8d82e4
8 changed files with 140 additions and 32 deletions

View File

@@ -49,6 +49,18 @@ This default configuration is achieved by creating references to the remote bran
},
}
type remoteDialerWithGitCacheRoot struct {
dbfactory.GRPCDialProvider
root string
}
func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) {
if strings.TrimSpace(d.root) == "" {
return "", false
}
return d.root, true
}
type CloneCmd struct{}
// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
@@ -130,7 +142,11 @@ func clone(ctx context.Context, apr *argparser.ArgParseResults, dEnv *env.DoltEn
var r env.Remote
var srcDB *doltdb.DoltDB
r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv)
cloneRoot, err := dEnv.FS.Abs(dir)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv, cloneRoot)
if verr != nil {
return verr
}
@@ -211,11 +227,15 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE
return dir, urlStr, nil
}
func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) {
func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv, cloneRoot string) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) {
cli.Printf("cloning %s\n", remoteUrl)
r := env.NewRemote(remoteName, remoteUrl, params)
ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dEnv)
dialer := dbfactory.GRPCDialProvider(dEnv)
if strings.TrimSpace(cloneRoot) != "" {
dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: dEnv, root: cloneRoot}
}
ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dialer)
if err != nil {
bdr := errhand.BuildDError("error: failed to get remote db").AddCause(err)
return env.NoRemote, nil, bdr.Build()

View File

@@ -34,12 +34,21 @@ import (
)
const (
// GitCacheRootParam is the absolute path to the local Dolt repository root (the directory that contains `.dolt/`).
// When set for git remotes, callers can choose a per-repo cache location under `.dolt/`.
GitCacheRootParam = "git_cache_root"
GitRefParam = "git_ref"
GitRemoteNameParam = "git_remote_name"
defaultGitRef = "refs/dolt/data"
defaultGitRemoteName = "origin"
)
// GitCacheRootProvider provides the local Dolt repo root for per-repo git remote caches.
// Implementations should return ok=false when no repo root is available.
type GitCacheRootProvider interface {
GitCacheRoot() (string, bool)
}
// GitRemoteFactory opens a Dolt database backed by a Git remote, using a local bare
// repository as an object cache and remote configuration store.
//
@@ -83,10 +92,14 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor
return nil, nil, nil, err
}
cacheBase, err := defaultGitCacheBase()
cacheRoot, ok, err := resolveGitCacheRoot(params)
if err != nil {
return nil, nil, nil, err
}
if !ok {
return nil, nil, nil, fmt.Errorf("%s is required for git remotes", GitCacheRootParam)
}
cacheBase := filepath.Join(cacheRoot, DoltDir, "git-remote-cache")
cacheRepo, err := cacheRepoPath(cacheBase, remoteURL.String(), ref)
if err != nil {
@@ -166,12 +179,24 @@ func resolveGitRemoteName(params map[string]interface{}) string {
return defaultGitRemoteName
}
func defaultGitCacheBase() (string, error) {
base, err := os.UserCacheDir()
if err != nil {
return "", err
// resolveGitCacheRoot parses and validates the optional GitCacheRootParam.
// It returns ok=false when the param is not present.
func resolveGitCacheRoot(params map[string]interface{}) (root string, ok bool, err error) {
if params == nil {
return "", false, nil
}
return filepath.Join(base, "dolt", "git-remote-cache"), nil
v, ok := params[GitCacheRootParam]
if !ok || v == nil {
return "", false, nil
}
s, ok := v.(string)
if !ok {
return "", false, fmt.Errorf("%s must be a string", GitCacheRootParam)
}
if strings.TrimSpace(s) == "" {
return "", false, fmt.Errorf("%s cannot be empty", GitCacheRootParam)
}
return s, true, nil
}
func cacheRepoPath(cacheBase, remoteURL, ref string) (string, error) {

View File

@@ -49,7 +49,14 @@ func shortTempDir(t *testing.T) string {
return dir
}
func TestGitRemoteFactory_GitFile_UsesDefaultCacheDirAndCanWrite(t *testing.T) {
func TestGitRemoteFactory_GitFile_RequiresGitCacheRootParam(t *testing.T) {
ctx := context.Background()
_, _, _, err := CreateDB(ctx, types.Format_Default, "git+file:///tmp/remote.git", map[string]interface{}{})
require.Error(t, err)
require.Contains(t, err.Error(), GitCacheRootParam)
}
func TestGitRemoteFactory_GitFile_CachesUnderRepoDoltDirAndCanWrite(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found on PATH")
}
@@ -58,20 +65,22 @@ func TestGitRemoteFactory_GitFile_UsesDefaultCacheDirAndCanWrite(t *testing.T) {
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
localRepoRoot := shortTempDir(t)
remotePath := filepath.ToSlash(remoteRepo.GitDir)
remoteURL := "file://" + remotePath
urlStr := "git+file://" + remotePath
params := map[string]interface{}{}
params := map[string]interface{}{
GitCacheRootParam: localRepoRoot,
}
db, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params)
require.NoError(t, err)
require.NotNil(t, db)
require.NotNil(t, vrw)
// Ensure cache repo created under default cache dir.
base, err := os.UserCacheDir()
require.NoError(t, err)
cacheBase := filepath.Join(base, "dolt", "git-remote-cache")
// Ensure cache repo created under <repoRoot>/.dolt/git-remote-cache.
cacheBase := filepath.Join(localRepoRoot, DoltDir, "git-remote-cache")
sum := sha256.Sum256([]byte(remoteURL + "|" + "refs/dolt/data"))
h := hex.EncodeToString(sum[:])
@@ -120,8 +129,10 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil }
}
open := func() (db datas.Database, cs chunks.ChunkStore) {
params := map[string]interface{}{}
open := func(cacheRoot string) (db datas.Database, cs chunks.ChunkStore) {
params := map[string]interface{}{
GitCacheRootParam: cacheRoot,
}
d, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params)
require.NoError(t, err)
require.NotNil(t, d)
@@ -132,8 +143,11 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
return d, vs.ChunkStore()
}
cacheA := shortTempDir(t)
cacheB := shortTempDir(t)
// Client A writes a root pointing at chunk A.
dbA, csA := open()
dbA, csA := open(cacheA)
cA := chunks.NewChunk([]byte("clientA\n"))
require.NoError(t, csA.Put(ctx, cA, noopGetAddrs))
lastA, err := csA.Root(ctx)
@@ -144,7 +158,7 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
require.NoError(t, dbA.Close())
// Client B reads chunk A, then writes chunk B and updates the root.
dbB, csB := open()
dbB, csB := open(cacheB)
require.NoError(t, csB.Rebase(ctx))
rootB, err := csB.Root(ctx)
require.NoError(t, err)
@@ -161,7 +175,7 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
require.NoError(t, dbB.Close())
// Client A re-opens and should see B's update.
dbA2, csA2 := open()
dbA2, csA2 := open(cacheA)
require.NoError(t, csA2.Rebase(ctx))
rootA2, err := csA2.Root(ctx)
require.NoError(t, err)

View File

@@ -184,6 +184,16 @@ func (dEnv *DoltEnv) UrlStr() string {
return dEnv.urlStr
}
// GitCacheRoot returns the absolute path to the local Dolt repository root (the directory that contains `.dolt/`).
// It is used to place git-remote caches under `<repoRoot>/.dolt/...`.
func (dEnv *DoltEnv) GitCacheRoot() (string, bool) {
doltDir := dEnv.GetDoltDir()
if doltDir == "" {
return "", false
}
return filepath.Dir(doltDir), true
}
func createRepoState(fs filesys.Filesys) (*RepoState, error) {
repoState, rsErr := LoadRepoState(fs)

View File

@@ -106,6 +106,11 @@ func (r *Remote) GetRemoteDB(ctx context.Context, nbf *types.NomsBinFormat, dial
params[dbfactory.GRPCDialProviderParam] = dialer
if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") {
params[dbfactory.GitRemoteNameParam] = r.Name
if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok {
if root, ok := p.GitCacheRoot(); ok {
params[dbfactory.GitCacheRootParam] = root
}
}
}
return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params)
@@ -122,6 +127,11 @@ func (r *Remote) Prepare(ctx context.Context, nbf *types.NomsBinFormat, dialer d
params[dbfactory.GRPCDialProviderParam] = dialer
if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") {
params[dbfactory.GitRemoteNameParam] = r.Name
if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok {
if root, ok := p.GitCacheRoot(); ok {
params[dbfactory.GitCacheRootParam] = root
}
}
}
return dbfactory.PrepareDB(ctx, nbf, r.Url, params)
@@ -136,6 +146,11 @@ func (r *Remote) GetRemoteDBWithoutCaching(ctx context.Context, nbf *types.NomsB
params[dbfactory.GRPCDialProviderParam] = dialer
if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") {
params[dbfactory.GitRemoteNameParam] = r.Name
if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok {
if root, ok := p.GitCacheRoot(); ok {
params[dbfactory.GitCacheRootParam] = root
}
}
}
return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params)

View File

@@ -77,6 +77,18 @@ type DoltDatabaseProvider struct {
InitDatabaseHooks []InitDatabaseHook
}
type remoteDialerWithGitCacheRoot struct {
dbfactory.GRPCDialProvider
root string
}
func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) {
if strings.TrimSpace(d.root) == "" {
return "", false
}
return d.root, true
}
var _ sql.DatabaseProvider = (*DoltDatabaseProvider)(nil)
var _ sql.FunctionProvider = (*DoltDatabaseProvider)(nil)
var _ sql.MutableDatabaseProvider = (*DoltDatabaseProvider)(nil)
@@ -502,10 +514,26 @@ func (p *DoltDatabaseProvider) allRevisionDbs(ctx *sql.Context, db dsess.SqlData
}
func (p *DoltDatabaseProvider) GetRemoteDB(ctx context.Context, format *types.NomsBinFormat, r env.Remote, withCaching bool) (*doltdb.DoltDB, error) {
if withCaching {
return r.GetRemoteDB(ctx, format, p.remoteDialer)
// For git remotes, thread through the initiating database's repo root so git caches can be located under
// `<repoRoot>/.dolt/...` instead of a user-global cache dir.
dialer := p.remoteDialer
if sqlCtx, ok := ctx.(*sql.Context); ok {
baseName, _ := doltdb.SplitRevisionDbName(sqlCtx.GetCurrentDatabase())
dbKey := strings.ToLower(baseName)
p.mu.RLock()
dbLoc, ok := p.dbLocations[dbKey]
p.mu.RUnlock()
if ok && dbLoc != nil {
if root, err := dbLoc.Abs("."); err == nil && strings.TrimSpace(root) != "" {
dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: root}
}
}
}
return r.GetRemoteDBWithoutCaching(ctx, format, p.remoteDialer)
if withCaching {
return r.GetRemoteDB(ctx, format, dialer)
}
return r.GetRemoteDBWithoutCaching(ctx, format, dialer)
}
func (p *DoltDatabaseProvider) CreateDatabase(ctx *sql.Context, name string) error {
@@ -814,7 +842,11 @@ func (p *DoltDatabaseProvider) cloneDatabaseFromRemote(
}
r := env.NewRemote(remoteName, remoteUrl, remoteParams)
srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, p.remoteDialer)
destRoot, err := p.fs.Abs(dbName)
if err != nil {
return err
}
srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: destRoot})
if err != nil {
return err
}

View File

@@ -8,15 +8,11 @@ setup() {
cd $BATS_TMPDIR
cd dolt-repo-$$
mkdir "dolt-repo-clones"
# Keep auto-selected git cache dir inside this test's sandbox.
export XDG_CACHE_HOME="$(mktemp -d)"
}
teardown() {
assert_feature_version
teardown_common
rm -rf "$XDG_CACHE_HOME"
}
@test "remotes-git: smoke push/clone/push-back/pull" {

View File

@@ -7,15 +7,11 @@ setup() {
setup_common
cd $BATS_TMPDIR
cd dolt-repo-$$
# Keep auto-selected git cache dir inside this test's sandbox.
export XDG_CACHE_HOME="$(mktemp -d)"
}
teardown() {
assert_feature_version
teardown_common
rm -rf "$XDG_CACHE_HOME"
}
@test "sql-remotes-git: dolt_remote add supports --ref for git remotes" {