/{go,integration-tests}: set max size

This commit is contained in:
coffeegoddd☕️✨
2026-02-11 15:22:52 -08:00
parent 7e35f80506
commit f19ddbfac2
7 changed files with 155 additions and 1 deletions

View File

@@ -168,7 +168,13 @@ func addRemote(sqlCtx *sql.Context, queryist cli.Queryist, dEnv *env.DoltEnv, ap
}
if len(params) == 0 {
err := callSQLRemoteAdd(sqlCtx, queryist, remoteName, remoteUrl)
// For git remotes, ensure the stored URL is the normalized git+* dbfactory URL (e.g. scp-style ssh becomes
// git+ssh://..., and local paths become git+file://...).
urlToStore := remoteUrl
if strings.HasPrefix(strings.ToLower(scheme), "git+") {
urlToStore = absRemoteUrl
}
err := callSQLRemoteAdd(sqlCtx, queryist, remoteName, urlToStore)
if err != nil {
return errhand.BuildDError("error: Unable to add remote.").AddCause(err).Build()
}

View File

@@ -17,6 +17,7 @@ package commands
import (
"fmt"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/assert"
@@ -146,3 +147,20 @@ func TestParseRemoteArgs_GitRef(t *testing.T) {
assert.Nil(t, verr)
assert.Equal(t, "refs/dolt/custom", params[dbfactory.GitRefParam])
}
func TestRemoteAdd_StoresNormalizedGitUrl(t *testing.T) {
// This mirrors the behavior in addRemote(): for git remotes, we must store the normalized git+* URL
// (in particular, scp-style ssh should never be stored as a schemeless string).
scheme := dbfactory.GitSSHScheme
original := "git@github.com:timsehn/dolt-in-git.git"
normalized, ok, err := env.NormalizeGitRemoteUrl(original)
assert.NoError(t, err)
assert.True(t, ok)
assert.Equal(t, "git+ssh://git@github.com/timsehn/dolt-in-git.git", normalized)
urlToStore := original
if strings.HasPrefix(strings.ToLower(scheme), "git+") {
urlToStore = normalized
}
assert.Equal(t, normalized, urlToStore)
}

View File

@@ -44,6 +44,8 @@ const (
defaultGitRemoteName = "origin"
)
var ErrGitRemoteHasNoBranches = errors.New("git remote has no branches")
// GitCacheRootProvider provides the local Dolt repo root for per-repo git remote caches.
// Implementations should return ok=false when no repo root is available.
type GitCacheRootProvider interface {
@@ -116,6 +118,9 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor
if err := ensureGitRemoteURL(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil {
return nil, nil, nil, err
}
if err := ensureRemoteHasBranches(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil {
return nil, nil, nil, err
}
q := nbs.NewUnlimitedMemQuotaProvider()
cs, err := nbs.NewGitStore(ctx, nbf.VersionString(), cacheRepo, ref, blobstore.GitBlobstoreOptions{RemoteName: remoteName}, defaultMemTableSize, q)
@@ -129,6 +134,17 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor
return db, vrw, ns, nil
}
func ensureRemoteHasBranches(ctx context.Context, gitDir string, remoteName string, remoteURL string) error {
out, err := runGitInDir(ctx, gitDir, "ls-remote", "--heads", "--", remoteName)
if err != nil {
return err
}
if strings.TrimSpace(out) == "" {
return fmt.Errorf("%w: cannot push to %q; initialize the repository with an initial branch/commit first", ErrGitRemoteHasNoBranches, remoteURL)
}
return nil
}
func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (remoteURL *url.URL, ref string, err error) {
if urlObj == nil {
return nil, "", fmt.Errorf("nil url")

View File

@@ -64,6 +64,8 @@ func TestGitRemoteFactory_GitFile_CachesUnderRepoDoltDirAndCanWrite(t *testing.T
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
_, err = remoteRepo.SetRefToTree(ctx, "refs/heads/main", map[string][]byte{"README": []byte("seed\n")}, "seed")
require.NoError(t, err)
localRepoRoot := shortTempDir(t)
@@ -121,6 +123,8 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
_, err = remoteRepo.SetRefToTree(ctx, "refs/heads/main", map[string][]byte{"README": []byte("seed\n")}, "seed")
require.NoError(t, err)
remotePath := filepath.ToSlash(remoteRepo.GitDir)
urlStr := "git+file://" + remotePath
@@ -185,3 +189,24 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
require.Equal(t, "clientB\n", string(gotB.Data()))
require.NoError(t, dbA2.Close())
}
func TestGitRemoteFactory_GitFile_RemoteWithNoBranchesFails(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found on PATH")
}
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
localRepoRoot := shortTempDir(t)
remotePath := filepath.ToSlash(remoteRepo.GitDir)
urlStr := "git+file://" + remotePath
params := map[string]interface{}{
GitCacheRootParam: localRepoRoot,
}
_, _, _, err = CreateDB(ctx, types.Format_Default, urlStr, params)
require.Error(t, err)
require.ErrorIs(t, err, ErrGitRemoteHasNoBranches)
}

View File

@@ -112,3 +112,55 @@ func TestNBS_GitBlobstore_EmptyRemote_FirstManifestUpdateBootstrapsRef(t *testin
require.NoError(t, writeManifest(&buf, contents))
require.NotEmpty(t, buf.Bytes())
}
func TestNBS_NewGitStore_DefaultsMaxPartSizeTo50MB(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found on PATH")
}
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git")
require.NoError(t, err)
_, err = remoteRepo.SetRefToTree(ctx, blobstore.DoltDataRef, nil, "seed empty")
require.NoError(t, err)
localRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/local.git")
require.NoError(t, err)
cmd := exec.CommandContext(ctx, "git", "--git-dir", localRepo.GitDir, "remote", "add", "origin", remoteRepo.GitDir)
out, err := cmd.CombinedOutput()
require.NoError(t, err, "git remote add failed: %s", string(out))
store, err := NewGitStore(ctx, types.Format_DOLT.VersionString(), localRepo.GitDir, blobstore.DoltDataRef, blobstore.GitBlobstoreOptions{}, 0, NewUnlimitedMemQuotaProvider())
require.NoError(t, err)
defer store.Close()
// Assert the underlying blobstore is a GitBlobstore and that chunked writes are enabled by default.
bsp, ok := store.persister.(*blobstorePersister)
require.True(t, ok, "expected persister to be *blobstorePersister, got %T", store.persister)
gbs, ok := bsp.bs.(*blobstore.GitBlobstore)
require.True(t, ok, "expected blobstore to be *blobstore.GitBlobstore, got %T", bsp.bs)
// Use a totalSize larger than the default 50MB threshold, but provide a tiny reader.
// planPutWrites decides chunked-vs-inline based on totalSize, and should pick chunked mode
// when the default max part size is enabled.
_, err = gbs.Put(ctx, "k", int64(50*1024*1024+1), bytes.NewReader([]byte("hi")))
require.NoError(t, err)
// On the remote, key "k" should be represented as a tree containing part "0001".
cmd = exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "rev-parse", "--verify", "--quiet", blobstore.DoltDataRef+"^{commit}")
headBytes, err := cmd.CombinedOutput()
require.NoError(t, err, "git rev-parse failed: %s", string(headBytes))
commit := string(bytes.TrimSpace(headBytes))
cmd = exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "ls-tree", commit, "k")
lsOut, err := cmd.CombinedOutput()
require.NoError(t, err, "git ls-tree failed: %s", string(lsOut))
require.Contains(t, string(lsOut), "\tk\n")
require.Contains(t, string(lsOut), "tree")
cmd = exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "ls-tree", commit, "k/0001")
partOut, err := cmd.CombinedOutput()
require.NoError(t, err, "git ls-tree part failed: %s", string(partOut))
require.Contains(t, string(partOut), "0001")
}

View File

@@ -628,6 +628,12 @@ func NewOCISStore(ctx context.Context, nbfVerStr string, bucketName, path string
func NewGitStore(ctx context.Context, nbfVerStr string, gitDir string, ref string, opts blobstore.GitBlobstoreOptions, memTableSize uint64, q MemoryQuotaProvider) (*NomsBlockStore, error) {
cacheOnce.Do(makeGlobalCaches)
// A Git remote may reject large blobs. To keep git-backed remotes broadly usable by default, enable
// chunked-object writes with a conservative max part size unless the caller explicitly overrides it.
if opts.MaxPartSize == 0 {
opts.MaxPartSize = 50 * 1024 * 1024
}
bs, err := blobstore.NewGitBlobstoreWithOptions(gitDir, ref, opts)
if err != nil {
return nil, err
@@ -640,6 +646,10 @@ func NewGitStore(ctx context.Context, nbfVerStr string, gitDir string, ref strin
func NewNoConjoinGitStore(ctx context.Context, nbfVerStr string, gitDir string, ref string, opts blobstore.GitBlobstoreOptions, memTableSize uint64, q MemoryQuotaProvider) (*NomsBlockStore, error) {
cacheOnce.Do(makeGlobalCaches)
if opts.MaxPartSize == 0 {
opts.MaxPartSize = 50 * 1024 * 1024
}
bs, err := blobstore.NewGitBlobstoreWithOptions(gitDir, ref, opts)
if err != nil {
return nil, err

View File

@@ -18,9 +18,31 @@ teardown() {
teardown_common
}
seed_git_remote_branch() {
# Create an initial branch on an otherwise-empty bare git remote.
# Dolt git remotes require at least one git branch to exist on the remote.
local remote_git_dir="$1"
local branch="${2:-main}"
mkdir seed-repo
cd seed-repo
git init >/dev/null
git config user.email "bats@email.fake"
git config user.name "Bats Tests"
echo "seed" > README
git add README
git commit -m "seed" >/dev/null
git branch -M "$branch"
git remote add origin "../$remote_git_dir"
git push origin "$branch" >/dev/null
cd ..
rm -rf seed-repo
}
@test "remotes-git: smoke push/clone/push-back/pull" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main
mkdir repo1
cd repo1
@@ -58,6 +80,7 @@ teardown() {
@test "remotes-git: empty remote bootstrap creates refs/dolt/data" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main
# Assert the dolt data ref doesn't exist yet.
run git --git-dir remote.git show-ref refs/dolt/data
@@ -82,6 +105,7 @@ teardown() {
@test "remotes-git: pull also fetches branches from git remote" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main
mkdir repo1
cd repo1
@@ -115,6 +139,7 @@ teardown() {
@test "remotes-git: pull fetches but does not merge other branches" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main
mkdir repo1
cd repo1
@@ -156,6 +181,7 @@ teardown() {
@test "remotes-git: custom --ref writes to configured dolt data ref" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main
mkdir repo1
cd repo1
@@ -192,6 +218,7 @@ teardown() {
@test "remotes-git: push works with per-repo git cache under .dolt/" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main
mkdir repo1
cd repo1