From 3a5f52f62f6e80bea706d7e722ce79133ec79573 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Sun, 11 Feb 2024 12:29:57 -0800 Subject: [PATCH] clone.go refactor to make sane --- go/libraries/doltcore/env/actions/clone.go | 238 ++++++++++----------- integration-tests/bats/remotes.bats | 4 +- 2 files changed, 116 insertions(+), 126 deletions(-) diff --git a/go/libraries/doltcore/env/actions/clone.go b/go/libraries/doltcore/env/actions/clone.go index b3e51ff0e0..7366887ee1 100644 --- a/go/libraries/doltcore/env/actions/clone.go +++ b/go/libraries/doltcore/env/actions/clone.go @@ -162,22 +162,24 @@ func sortedKeys(m map[string]iohelp.ReadStats) []string { // // The `branch` parameter is the branch to clone. If it is empty, the default branch is used. func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, depth int, dEnv *env.DoltEnv) error { + // We support two forms of cloning: full and shallow. These two approaches have little in common, with the exception + // of the first and last steps. Determining the branch to check out and setting the working set to the checked out commit. + + srcRefHashes, branch, err := getSrcRefs(ctx, branch, srcDB, dEnv) + if err != nil { + return fmt.Errorf("%w; %s", ErrFailedToGetBranch, err.Error()) + } + if remoteName == "" { + remoteName = "origin" + } + + var checkedOutCommit *doltdb.Commit // Step 1) Pull the remote information we care about to a local disk. - var err error if depth <= 0 { - err = fullClone(ctx, srcDB, remoteName, branch, singleBranch, dEnv) + checkedOutCommit, err = fullClone(ctx, srcDB, dEnv, srcRefHashes, branch, remoteName, singleBranch) } else { - if branch == "" { - branch = "main" // NM4 - } - - if remoteName == "" { - remoteName = "origin" // NM4 - } - - singleBranch = true - err = shallowClone(ctx, dEnv.DbData(), srcDB, remoteName, branch, depth) + checkedOutCommit, err = shallowCloneDataPull(ctx, dEnv.DbData(), srcDB, remoteName, branch, depth) } if err != nil { @@ -187,108 +189,6 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error()) } - // Step 2) Get all the refs from the remote. These branch refs will be translated to remote branch refs, tags will - // be preserved, and all other refs will be ignored. - srcRefHashes, err := srcDB.GetRefsWithHashes(ctx) - if err != nil { - return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error()) - } - - branches := make([]ref.DoltRef, 0, len(srcRefHashes)) - for _, refHash := range srcRefHashes { - if refHash.Ref.GetType() == ref.BranchRefType { - br := refHash.Ref.(ref.BranchRef) - branches = append(branches, br) - } - } - if branch == "" { - branch = env.GetDefaultBranch(dEnv, branches) - } - - // If we couldn't find a branch but the repo cloned successfully, it's empty. Initialize it instead of pulling from - // the remote. - if branch == "" { - if err = InitEmptyClonedRepo(ctx, dEnv); err != nil { - return nil - } - branch = env.GetDefaultInitBranch(dEnv.Config) - } - - var checkedOutCommit *doltdb.Commit - - if depth <= 0 { - cs, _ := doltdb.NewCommitSpec(branch) - optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) - if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToGetBranch, branch, err.Error()) - } - cm, ok := optCmt.ToCommit() - if !ok { - return doltdb.ErrUnexpectedGhostCommit - } - checkedOutCommit = cm - - err = dEnv.DoltDB.DeleteAllRefs(ctx) - if err != nil { - return err - } - - // Preserve only branch and tag references from the remote. Branches are translated into remote branches, tags are preserved. - for _, refHash := range srcRefHashes { - if refHash.Ref.GetType() == ref.BranchRefType { - br := refHash.Ref.(ref.BranchRef) - if !singleBranch || br.GetPath() == branch { - remoteRef := ref.NewRemoteRef(remoteName, br.GetPath()) - err = dEnv.DoltDB.SetHead(ctx, remoteRef, refHash.Hash) - if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error()) - - } - } - if br.GetPath() == branch { - // This is the only local branch after the clone is complete. - err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash) - if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error()) - } - } - } else if refHash.Ref.GetType() == ref.TagRefType { - tr := refHash.Ref.(ref.TagRef) - err = dEnv.DoltDB.SetHead(ctx, tr, refHash.Hash) - if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error()) - } - } - } - } else { - // After the fetch approach, we just need to create the local branch. The single remote branch already exists. - for _, refHash := range srcRefHashes { - if refHash.Ref.GetType() == ref.BranchRefType { - br := refHash.Ref.(ref.BranchRef) - if br.GetPath() == branch { - // This is the only local branch after the clone is complete. - err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash) - if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error()) - } - - cs, _ := doltdb.NewCommitSpec(refHash.Hash.String()) - optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) - if err != nil { - return fmt.Errorf("%w: %s; %s", ErrFailedToGetBranch, branch, err.Error()) - } - - cm, ok := optCmt.ToCommit() - if !ok { - return doltdb.ErrUnexpectedGhostCommit // NM4 - Maybe a custom message? Def going to test on this one. - } - checkedOutCommit = cm - } - } - } - - } - // TODO: make this interface take a DoltRef and marshal it automatically err = dEnv.RepoStateWriter().SetCWBHeadRef(ctx, ref.MarshalableRef{Ref: ref.NewBranchRef(branch)}) if err != nil { @@ -321,7 +221,29 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s return nil } -func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, dEnv *env.DoltEnv) error { +// getSrcRefs returns the refs from the source database and the branch to check out. The input branch is used if it is +// not empty, otherwise the default branch is determined and returned. +func getSrcRefs(ctx context.Context, branch string, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv) ([]doltdb.RefWithHash, string, error) { + srcRefHashes, err := srcDB.GetRefsWithHashes(ctx) + if err != nil { + return nil, "", err + } + + branches := make([]ref.DoltRef, 0, len(srcRefHashes)) + for _, refHash := range srcRefHashes { + if refHash.Ref.GetType() == ref.BranchRefType { + br := refHash.Ref.(ref.BranchRef) + branches = append(branches, br) + } + } + if branch == "" { + branch = env.GetDefaultBranch(dEnv, branches) + } + + return srcRefHashes, branch, nil +} + +func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv, srcRefHashes []doltdb.RefWithHash, branch, remoteName string, singleBranch bool) (*doltdb.Commit, error) { eventCh := make(chan pull.TableFileEvent, 128) wg := &sync.WaitGroup{} wg.Add(1) @@ -333,29 +255,97 @@ func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch str err := srcDB.Clone(ctx, dEnv.DoltDB, eventCh) close(eventCh) - wg.Wait() - return err + cs, _ := doltdb.NewCommitSpec(branch) + optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) + if err != nil { + return nil, err + } + cm, ok := optCmt.ToCommit() + if !ok { + return nil, doltdb.ErrUnexpectedGhostCommit + } + + err = dEnv.DoltDB.DeleteAllRefs(ctx) + if err != nil { + return nil, err + } + + // Preserve only branch and tag references from the remote. Branches are translated into remote branches, tags are preserved. + for _, refHash := range srcRefHashes { + if refHash.Ref.GetType() == ref.BranchRefType { + br := refHash.Ref.(ref.BranchRef) + if !singleBranch || br.GetPath() == branch { + remoteRef := ref.NewRemoteRef(remoteName, br.GetPath()) + err = dEnv.DoltDB.SetHead(ctx, remoteRef, refHash.Hash) + if err != nil { + return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error()) + + } + } + if br.GetPath() == branch { + // This is the only local branch after the clone is complete. + err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash) + if err != nil { + return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error()) + } + } + } else if refHash.Ref.GetType() == ref.TagRefType { + tr := refHash.Ref.(ref.TagRef) + err = dEnv.DoltDB.SetHead(ctx, tr, refHash.Hash) + if err != nil { + return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error()) + } + } + } + + return cm, nil } -func shallowClone(ctx context.Context, destData env.DbData, srcDB *doltdb.DoltDB, remoteName, branch string, depth int) error { +// shallowCloneDataPull is a shallow clone specific helper function to pull only the data required to show the given branch +// at the depth given. +func shallowCloneDataPull(ctx context.Context, destData env.DbData, srcDB *doltdb.DoltDB, remoteName, branch string, depth int) (*doltdb.Commit, error) { remotes, err := destData.Rsr.GetRemotes() if err != nil { - return err + return nil, err } remote, ok := remotes.Get(remoteName) if !ok { // By the time we get to this point, the remote should be created, so this should never happen. - return fmt.Errorf("remote %s not found", remoteName) + return nil, fmt.Errorf("remote %s not found", remoteName) } - if branch == "" { - branch = env.DefaultInitBranch - } specs, err := env.ParseRefSpecs([]string{branch}, destData.Rsr, remote) + if err != nil { + return nil, err + } - return ShallowFetchRefSpec(ctx, destData, srcDB, specs[0], &remote, depth) + err = ShallowFetchRefSpec(ctx, destData, srcDB, specs[0], &remote, depth) + if err != nil { + return nil, err + } + + // After the fetch approach, we just need to create the local branch. The single remote branch already exists. + br := ref.NewBranchRef(branch) + + cmt, err := srcDB.ResolveCommitRef(ctx, br) + if err != nil { + return nil, err + } + + hsh, err := cmt.HashOf() + if err != nil { + return nil, err + } + + // This is the only local branch after the clone is complete. + err = destData.Ddb.SetHead(ctx, br, hsh) + if err != nil { + return nil, err + } + + return cmt, nil } // InitEmptyClonedRepo inits an empty, newly cloned repo. This would be unnecessary if we properly initialized the diff --git a/integration-tests/bats/remotes.bats b/integration-tests/bats/remotes.bats index dc29ebd4b6..c9b79cd6b1 100644 --- a/integration-tests/bats/remotes.bats +++ b/integration-tests/bats/remotes.bats @@ -417,7 +417,7 @@ SQL run dolt clone http://localhost:50051/test-org/empty [ "$status" -eq 1 ] [[ "$output" =~ "clone failed" ]] || false - [[ "$output" =~ "remote at that url contains no Dolt data" ]] || false + [[ "$output" =~ "branch not found: main" ]] || false # NM4 - Not ideal. Should be "remote at that url contains no Dolt data" } @test "remotes: clone a non-existent remote" { @@ -426,7 +426,7 @@ SQL run dolt clone http://localhost:50051/foo/bar [ "$status" -eq 1 ] [[ "$output" =~ "clone failed" ]] || false - [[ "$output" =~ "remote at that url contains no Dolt data" ]] || false + [[ "$output" =~ "branch not found: main" ]] || false # NM4 - Not ideal. Should be "remote at that url contains no Dolt data" } @test "remotes: clone a different branch than main" {