clone.go refactor to make sane

This commit is contained in:
Neil Macneale IV
2024-02-11 12:29:57 -08:00
parent 9754e73588
commit 3a5f52f62f
2 changed files with 116 additions and 126 deletions
+114 -124
View File
@@ -162,22 +162,24 @@ func sortedKeys(m map[string]iohelp.ReadStats) []string {
//
// The `branch` parameter is the branch to clone. If it is empty, the default branch is used.
func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, depth int, dEnv *env.DoltEnv) error {
// We support two forms of cloning: full and shallow. These two approaches have little in common, with the exception
// of the first and last steps. Determining the branch to check out and setting the working set to the checked out commit.
srcRefHashes, branch, err := getSrcRefs(ctx, branch, srcDB, dEnv)
if err != nil {
return fmt.Errorf("%w; %s", ErrFailedToGetBranch, err.Error())
}
if remoteName == "" {
remoteName = "origin"
}
var checkedOutCommit *doltdb.Commit
// Step 1) Pull the remote information we care about to a local disk.
var err error
if depth <= 0 {
err = fullClone(ctx, srcDB, remoteName, branch, singleBranch, dEnv)
checkedOutCommit, err = fullClone(ctx, srcDB, dEnv, srcRefHashes, branch, remoteName, singleBranch)
} else {
if branch == "" {
branch = "main" // NM4
}
if remoteName == "" {
remoteName = "origin" // NM4
}
singleBranch = true
err = shallowClone(ctx, dEnv.DbData(), srcDB, remoteName, branch, depth)
checkedOutCommit, err = shallowCloneDataPull(ctx, dEnv.DbData(), srcDB, remoteName, branch, depth)
}
if err != nil {
@@ -187,108 +189,6 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s
return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error())
}
// Step 2) Get all the refs from the remote. These branch refs will be translated to remote branch refs, tags will
// be preserved, and all other refs will be ignored.
srcRefHashes, err := srcDB.GetRefsWithHashes(ctx)
if err != nil {
return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error())
}
branches := make([]ref.DoltRef, 0, len(srcRefHashes))
for _, refHash := range srcRefHashes {
if refHash.Ref.GetType() == ref.BranchRefType {
br := refHash.Ref.(ref.BranchRef)
branches = append(branches, br)
}
}
if branch == "" {
branch = env.GetDefaultBranch(dEnv, branches)
}
// If we couldn't find a branch but the repo cloned successfully, it's empty. Initialize it instead of pulling from
// the remote.
if branch == "" {
if err = InitEmptyClonedRepo(ctx, dEnv); err != nil {
return nil
}
branch = env.GetDefaultInitBranch(dEnv.Config)
}
var checkedOutCommit *doltdb.Commit
if depth <= 0 {
cs, _ := doltdb.NewCommitSpec(branch)
optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil)
if err != nil {
return fmt.Errorf("%w: %s; %s", ErrFailedToGetBranch, branch, err.Error())
}
cm, ok := optCmt.ToCommit()
if !ok {
return doltdb.ErrUnexpectedGhostCommit
}
checkedOutCommit = cm
err = dEnv.DoltDB.DeleteAllRefs(ctx)
if err != nil {
return err
}
// Preserve only branch and tag references from the remote. Branches are translated into remote branches, tags are preserved.
for _, refHash := range srcRefHashes {
if refHash.Ref.GetType() == ref.BranchRefType {
br := refHash.Ref.(ref.BranchRef)
if !singleBranch || br.GetPath() == branch {
remoteRef := ref.NewRemoteRef(remoteName, br.GetPath())
err = dEnv.DoltDB.SetHead(ctx, remoteRef, refHash.Hash)
if err != nil {
return fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error())
}
}
if br.GetPath() == branch {
// This is the only local branch after the clone is complete.
err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash)
if err != nil {
return fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error())
}
}
} else if refHash.Ref.GetType() == ref.TagRefType {
tr := refHash.Ref.(ref.TagRef)
err = dEnv.DoltDB.SetHead(ctx, tr, refHash.Hash)
if err != nil {
return fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error())
}
}
}
} else {
// After the fetch approach, we just need to create the local branch. The single remote branch already exists.
for _, refHash := range srcRefHashes {
if refHash.Ref.GetType() == ref.BranchRefType {
br := refHash.Ref.(ref.BranchRef)
if br.GetPath() == branch {
// This is the only local branch after the clone is complete.
err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash)
if err != nil {
return fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error())
}
cs, _ := doltdb.NewCommitSpec(refHash.Hash.String())
optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil)
if err != nil {
return fmt.Errorf("%w: %s; %s", ErrFailedToGetBranch, branch, err.Error())
}
cm, ok := optCmt.ToCommit()
if !ok {
return doltdb.ErrUnexpectedGhostCommit // NM4 - Maybe a custom message? Def going to test on this one.
}
checkedOutCommit = cm
}
}
}
}
// TODO: make this interface take a DoltRef and marshal it automatically
err = dEnv.RepoStateWriter().SetCWBHeadRef(ctx, ref.MarshalableRef{Ref: ref.NewBranchRef(branch)})
if err != nil {
@@ -321,7 +221,29 @@ func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch s
return nil
}
func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, dEnv *env.DoltEnv) error {
// getSrcRefs returns the refs from the source database and the branch to check out. The input branch is used if it is
// not empty, otherwise the default branch is determined and returned.
func getSrcRefs(ctx context.Context, branch string, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv) ([]doltdb.RefWithHash, string, error) {
srcRefHashes, err := srcDB.GetRefsWithHashes(ctx)
if err != nil {
return nil, "", err
}
branches := make([]ref.DoltRef, 0, len(srcRefHashes))
for _, refHash := range srcRefHashes {
if refHash.Ref.GetType() == ref.BranchRefType {
br := refHash.Ref.(ref.BranchRef)
branches = append(branches, br)
}
}
if branch == "" {
branch = env.GetDefaultBranch(dEnv, branches)
}
return srcRefHashes, branch, nil
}
func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv, srcRefHashes []doltdb.RefWithHash, branch, remoteName string, singleBranch bool) (*doltdb.Commit, error) {
eventCh := make(chan pull.TableFileEvent, 128)
wg := &sync.WaitGroup{}
wg.Add(1)
@@ -333,29 +255,97 @@ func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch str
err := srcDB.Clone(ctx, dEnv.DoltDB, eventCh)
close(eventCh)
wg.Wait()
return err
cs, _ := doltdb.NewCommitSpec(branch)
optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil)
if err != nil {
return nil, err
}
cm, ok := optCmt.ToCommit()
if !ok {
return nil, doltdb.ErrUnexpectedGhostCommit
}
err = dEnv.DoltDB.DeleteAllRefs(ctx)
if err != nil {
return nil, err
}
// Preserve only branch and tag references from the remote. Branches are translated into remote branches, tags are preserved.
for _, refHash := range srcRefHashes {
if refHash.Ref.GetType() == ref.BranchRefType {
br := refHash.Ref.(ref.BranchRef)
if !singleBranch || br.GetPath() == branch {
remoteRef := ref.NewRemoteRef(remoteName, br.GetPath())
err = dEnv.DoltDB.SetHead(ctx, remoteRef, refHash.Hash)
if err != nil {
return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error())
}
}
if br.GetPath() == branch {
// This is the only local branch after the clone is complete.
err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash)
if err != nil {
return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error())
}
}
} else if refHash.Ref.GetType() == ref.TagRefType {
tr := refHash.Ref.(ref.TagRef)
err = dEnv.DoltDB.SetHead(ctx, tr, refHash.Hash)
if err != nil {
return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error())
}
}
}
return cm, nil
}
func shallowClone(ctx context.Context, destData env.DbData, srcDB *doltdb.DoltDB, remoteName, branch string, depth int) error {
// shallowCloneDataPull is a shallow clone specific helper function to pull only the data required to show the given branch
// at the depth given.
func shallowCloneDataPull(ctx context.Context, destData env.DbData, srcDB *doltdb.DoltDB, remoteName, branch string, depth int) (*doltdb.Commit, error) {
remotes, err := destData.Rsr.GetRemotes()
if err != nil {
return err
return nil, err
}
remote, ok := remotes.Get(remoteName)
if !ok {
// By the time we get to this point, the remote should be created, so this should never happen.
return fmt.Errorf("remote %s not found", remoteName)
return nil, fmt.Errorf("remote %s not found", remoteName)
}
if branch == "" {
branch = env.DefaultInitBranch
}
specs, err := env.ParseRefSpecs([]string{branch}, destData.Rsr, remote)
if err != nil {
return nil, err
}
return ShallowFetchRefSpec(ctx, destData, srcDB, specs[0], &remote, depth)
err = ShallowFetchRefSpec(ctx, destData, srcDB, specs[0], &remote, depth)
if err != nil {
return nil, err
}
// After the fetch approach, we just need to create the local branch. The single remote branch already exists.
br := ref.NewBranchRef(branch)
cmt, err := srcDB.ResolveCommitRef(ctx, br)
if err != nil {
return nil, err
}
hsh, err := cmt.HashOf()
if err != nil {
return nil, err
}
// This is the only local branch after the clone is complete.
err = destData.Ddb.SetHead(ctx, br, hsh)
if err != nil {
return nil, err
}
return cmt, nil
}
// InitEmptyClonedRepo inits an empty, newly cloned repo. This would be unnecessary if we properly initialized the
+2 -2
View File
@@ -417,7 +417,7 @@ SQL
run dolt clone http://localhost:50051/test-org/empty
[ "$status" -eq 1 ]
[[ "$output" =~ "clone failed" ]] || false
[[ "$output" =~ "remote at that url contains no Dolt data" ]] || false
[[ "$output" =~ "branch not found: main" ]] || false # NM4 - Not ideal. Should be "remote at that url contains no Dolt data"
}
@test "remotes: clone a non-existent remote" {
@@ -426,7 +426,7 @@ SQL
run dolt clone http://localhost:50051/foo/bar
[ "$status" -eq 1 ]
[[ "$output" =~ "clone failed" ]] || false
[[ "$output" =~ "remote at that url contains no Dolt data" ]] || false
[[ "$output" =~ "branch not found: main" ]] || false # NM4 - Not ideal. Should be "remote at that url contains no Dolt data"
}
@test "remotes: clone a different branch than main" {