From f0aaed72a0156b472fa1baf002eefee9b174398e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Wed, 4 Feb 2026 11:10:39 -0800 Subject: [PATCH] /go/store/blobstore/{git_blobstore,internal}: add git internal write api, refactor reads a bit --- go/store/blobstore/git_blobstore.go | 15 +- go/store/blobstore/internal/git/oid.go | 20 ++ go/store/blobstore/internal/git/read.go | 171 ++---------------- go/store/blobstore/internal/git/read_impl.go | 151 ++++++++++++++++ go/store/blobstore/internal/git/util.go | 42 +++++ go/store/blobstore/internal/git/write_impl.go | 113 ++++++++++++ 6 files changed, 354 insertions(+), 158 deletions(-) create mode 100644 go/store/blobstore/internal/git/oid.go create mode 100644 go/store/blobstore/internal/git/read_impl.go create mode 100644 go/store/blobstore/internal/git/util.go create mode 100644 go/store/blobstore/internal/git/write_impl.go diff --git a/go/store/blobstore/git_blobstore.go b/go/store/blobstore/git_blobstore.go index c1e7055f2f..d37031d6a1 100644 --- a/go/store/blobstore/git_blobstore.go +++ b/go/store/blobstore/git_blobstore.go @@ -34,6 +34,7 @@ type GitBlobstore struct { gitDir string ref string runner *git.Runner + read git.ReadAPI } var _ Blobstore = (*GitBlobstore)(nil) @@ -45,7 +46,7 @@ func NewGitBlobstore(gitDir, ref string) (*GitBlobstore, error) { if err != nil { return nil, err } - return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r}, nil + return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r, read: git.NewReadAPIImpl(r)}, nil } func (gbs *GitBlobstore) Path() string { @@ -57,14 +58,14 @@ func (gbs *GitBlobstore) Exists(ctx context.Context, key string) (bool, error) { if err != nil { return false, err } - commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref) + commit, ok, err := gbs.read.TryResolveRefCommit(ctx, gbs.ref) if err != nil { return false, err } if !ok { return false, nil } - _, err = git.ResolvePathBlob(ctx, gbs.runner, commit, key) + _, err = gbs.read.ResolvePathBlob(ctx, commit, key) if err != nil { if git.IsPathNotFound(err) { return false, nil @@ -79,7 +80,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. if err != nil { return nil, 0, "", err } - commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref) + commit, ok, err := gbs.read.TryResolveRefCommit(ctx, gbs.ref) if err != nil { return nil, 0, "", err } @@ -92,7 +93,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. return nil, 0, "", &git.RefNotFoundError{Ref: gbs.ref} } - blobOID, err := git.ResolvePathBlob(ctx, gbs.runner, commit, key) + blobOID, err := gbs.read.ResolvePathBlob(ctx, commit, key) if err != nil { if git.IsPathNotFound(err) { return nil, 0, commit.String(), NotFound{Key: key} @@ -100,7 +101,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. return nil, 0, commit.String(), err } - sz, err := git.BlobSize(ctx, gbs.runner, blobOID) + sz, err := gbs.read.BlobSize(ctx, blobOID) if err != nil { return nil, 0, commit.String(), err } @@ -108,7 +109,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. // TODO(gitblobstore): This streaming implementation is correct but may be slow for workloads // that do many small ranged reads (e.g. table index/footer reads). Consider caching/materializing // blobs to a local file (or using a batched git cat-file mode) to serve ranges efficiently. - rc, err := git.BlobReader(ctx, gbs.runner, blobOID) + rc, err := gbs.read.BlobReader(ctx, blobOID) if err != nil { return nil, 0, commit.String(), err } diff --git a/go/store/blobstore/internal/git/oid.go b/go/store/blobstore/internal/git/oid.go new file mode 100644 index 0000000000..2b3827faa2 --- /dev/null +++ b/go/store/blobstore/internal/git/oid.go @@ -0,0 +1,20 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +// OID is a git object id in hex (typically 40-char SHA1). +type OID string + +func (o OID) String() string { return string(o) } diff --git a/go/store/blobstore/internal/git/read.go b/go/store/blobstore/internal/git/read.go index 4a6b16f09a..33a570c488 100644 --- a/go/store/blobstore/internal/git/read.go +++ b/go/store/blobstore/internal/git/read.go @@ -15,162 +15,31 @@ package git import ( - "bytes" "context" - "errors" - "fmt" "io" - "strconv" - "strings" ) -// OID is a git object id in hex (typically 40-char SHA1). -type OID string +// ReadAPI defines the git plumbing operations used by read paths. Like WriteAPI, +// it is designed to support swapping implementations (e.g. git CLI vs a Go git library) +// while keeping callers (such as GitBlobstore) stable. +type ReadAPI interface { + // TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist. + TryResolveRefCommit(ctx context.Context, ref string) (oid OID, ok bool, err error) -func (o OID) String() string { return string(o) } + // ResolveRefCommit resolves |ref| to a commit OID, returning RefNotFoundError if missing. + ResolveRefCommit(ctx context.Context, ref string) (OID, error) -// TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist. -func TryResolveRefCommit(ctx context.Context, r *Runner, ref string) (oid OID, ok bool, err error) { - out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}") - if err == nil { - s := strings.TrimSpace(string(out)) - if s == "" { - // Shouldn't happen, but treat as missing. - return "", false, nil - } - return OID(s), true, nil - } + // ResolvePathBlob resolves |path| within |commit| to a blob OID. + // It returns PathNotFoundError if the path does not exist, and NotBlobError if it + // resolves to a non-blob object. + ResolvePathBlob(ctx context.Context, commit OID, path string) (OID, error) - if isRefNotFoundErr(err) { - return "", false, nil - } - return "", false, err -} - -// ResolveRefCommit resolves |ref| to a commit OID. -func ResolveRefCommit(ctx context.Context, r *Runner, ref string) (OID, error) { - oid, ok, err := TryResolveRefCommit(ctx, r, ref) - if err != nil { - return "", err - } - if !ok { - return "", &RefNotFoundError{Ref: ref} - } - return oid, nil -} - -// ResolvePathBlob resolves |path| within |commit| to a blob OID. -// It returns PathNotFoundError if the path does not exist, and NotBlobError if the -// path resolves to a non-blob object (e.g. a tree). -func ResolvePathBlob(ctx context.Context, r *Runner, commit OID, path string) (OID, error) { - spec := commit.String() + ":" + path - out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec) - if err != nil { - if isPathNotFoundErr(err) { - return "", &PathNotFoundError{Commit: commit.String(), Path: path} - } - return "", err - } - oid := strings.TrimSpace(string(out)) - if oid == "" { - return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec) - } - - typ, err := CatFileType(ctx, r, OID(oid)) - if err != nil { - return "", err - } - if typ != "blob" { - return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ} - } - return OID(oid), nil -} - -// CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit"). -func CatFileType(ctx context.Context, r *Runner, oid OID) (string, error) { - out, err := r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String()) - if err != nil { - return "", err - } - return strings.TrimSpace(string(out)), nil -} - -// BlobSize returns the size in bytes of the blob object |oid|. -func BlobSize(ctx context.Context, r *Runner, oid OID) (int64, error) { - out, err := r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String()) - if err != nil { - return 0, err - } - s := strings.TrimSpace(string(out)) - n, err := strconv.ParseInt(s, 10, 64) - if err != nil { - return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err) - } - return n, nil -} - -// BlobReader returns a reader for blob contents. The returned ReadCloser will wait for -// the git process to exit when closed, returning a CmdError if the process fails. -func BlobReader(ctx context.Context, r *Runner, oid OID) (io.ReadCloser, error) { - rc, _, err := r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String()) - return rc, err -} - -func isRefNotFoundErr(err error) bool { - ce, ok := err.(*CmdError) - if !ok { - return false - } - // For `git rev-parse --verify --quiet ^{commit}`, a missing ref typically yields exit 1 and no output. - if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 { - return true - } - // Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check. - msg := strings.ToLower(string(ce.Output)) - return strings.Contains(msg, "needed a single revision") || - strings.Contains(msg, "unknown revision") || - strings.Contains(msg, "not a valid object name") -} - -func isPathNotFoundErr(err error) bool { - ce, ok := err.(*CmdError) - if !ok { - return false - } - if ce.ExitCode == 128 || ce.ExitCode == 1 { - msg := strings.ToLower(string(ce.Output)) - // Common patterns: - // - "fatal: Path 'x' does not exist in 'HEAD'" - // - "fatal: invalid object name 'HEAD:x'" - // - "fatal: Needed a single revision" - // - "fatal: ambiguous argument '...': unknown revision or path not in the working tree." - if strings.Contains(msg, "does not exist in") || - strings.Contains(msg, "invalid object name") || - strings.Contains(msg, "needed a single revision") || - strings.Contains(msg, "unknown revision or path not in the working tree") { - return true - } - } - return false -} - -// ReadAllBytes is a small helper for read-path callers that want a whole object. -// This is not used by GitBlobstore.Get (which must support BlobRange), but it is useful in tests. -func ReadAllBytes(ctx context.Context, r *Runner, oid OID) ([]byte, error) { - rc, err := BlobReader(ctx, r, oid) - if err != nil { - return nil, err - } - defer rc.Close() - return io.ReadAll(rc) -} - -// NormalizeGitPlumbingError unwraps CmdError wrappers, returning the underlying error. -// Mostly useful for callers that want to compare against context cancellation. -func NormalizeGitPlumbingError(err error) error { - var ce *CmdError - if errors.As(err, &ce) && ce.Cause != nil { - return ce.Cause - } - return err + // CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit"). + CatFileType(ctx context.Context, oid OID) (string, error) + + // BlobSize returns the size in bytes of the blob object |oid|. + BlobSize(ctx context.Context, oid OID) (int64, error) + + // BlobReader returns a reader for blob contents. + BlobReader(ctx context.Context, oid OID) (io.ReadCloser, error) } diff --git a/go/store/blobstore/internal/git/read_impl.go b/go/store/blobstore/internal/git/read_impl.go new file mode 100644 index 0000000000..1d6f1c9588 --- /dev/null +++ b/go/store/blobstore/internal/git/read_impl.go @@ -0,0 +1,151 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "bytes" + "context" + "fmt" + "io" + "strconv" + "strings" +) + +// ReadAPIImpl implements ReadAPI using the git CLI plumbing commands, via Runner. +type ReadAPIImpl struct { + r *Runner +} + +var _ ReadAPI = (*ReadAPIImpl)(nil) + +func NewReadAPIImpl(r *Runner) *ReadAPIImpl { + return &ReadAPIImpl{r: r} +} + +func (a *ReadAPIImpl) TryResolveRefCommit(ctx context.Context, ref string) (oid OID, ok bool, err error) { + out, err := a.r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}") + if err == nil { + s := strings.TrimSpace(string(out)) + if s == "" { + // Shouldn't happen, but treat as missing. + return "", false, nil + } + return OID(s), true, nil + } + + if isRefNotFoundErr(err) { + return "", false, nil + } + return "", false, err +} + +func (a *ReadAPIImpl) ResolveRefCommit(ctx context.Context, ref string) (OID, error) { + oid, ok, err := a.TryResolveRefCommit(ctx, ref) + if err != nil { + return "", err + } + if !ok { + return "", &RefNotFoundError{Ref: ref} + } + return oid, nil +} + +func (a *ReadAPIImpl) ResolvePathBlob(ctx context.Context, commit OID, path string) (OID, error) { + spec := commit.String() + ":" + path + out, err := a.r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec) + if err != nil { + if isPathNotFoundErr(err) { + return "", &PathNotFoundError{Commit: commit.String(), Path: path} + } + return "", err + } + oid := strings.TrimSpace(string(out)) + if oid == "" { + return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec) + } + + typ, err := a.CatFileType(ctx, OID(oid)) + if err != nil { + return "", err + } + if typ != "blob" { + return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ} + } + return OID(oid), nil +} + +func (a *ReadAPIImpl) CatFileType(ctx context.Context, oid OID) (string, error) { + out, err := a.r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String()) + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil +} + +func (a *ReadAPIImpl) BlobSize(ctx context.Context, oid OID) (int64, error) { + out, err := a.r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String()) + if err != nil { + return 0, err + } + s := strings.TrimSpace(string(out)) + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err) + } + return n, nil +} + +func (a *ReadAPIImpl) BlobReader(ctx context.Context, oid OID) (io.ReadCloser, error) { + rc, _, err := a.r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String()) + return rc, err +} + +func isRefNotFoundErr(err error) bool { + ce, ok := err.(*CmdError) + if !ok { + return false + } + // For `git rev-parse --verify --quiet ^{commit}`, a missing ref typically yields exit 1 and no output. + if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 { + return true + } + // Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check. + msg := strings.ToLower(string(ce.Output)) + return strings.Contains(msg, "needed a single revision") || + strings.Contains(msg, "unknown revision") || + strings.Contains(msg, "not a valid object name") +} + +func isPathNotFoundErr(err error) bool { + ce, ok := err.(*CmdError) + if !ok { + return false + } + if ce.ExitCode == 128 || ce.ExitCode == 1 { + msg := strings.ToLower(string(ce.Output)) + // Common patterns: + // - "fatal: Path 'x' does not exist in 'HEAD'" + // - "fatal: invalid object name 'HEAD:x'" + // - "fatal: Needed a single revision" + // - "fatal: ambiguous argument '...': unknown revision or path not in the working tree." + if strings.Contains(msg, "does not exist in") || + strings.Contains(msg, "invalid object name") || + strings.Contains(msg, "needed a single revision") || + strings.Contains(msg, "unknown revision or path not in the working tree") { + return true + } + } + return false +} diff --git a/go/store/blobstore/internal/git/util.go b/go/store/blobstore/internal/git/util.go new file mode 100644 index 0000000000..0a00ebfd98 --- /dev/null +++ b/go/store/blobstore/internal/git/util.go @@ -0,0 +1,42 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "context" + "errors" + "io" +) + +// ReadAllBytes is a small helper for read-path callers that want a whole object. +// This is not used by GitBlobstore.Get (which must support BlobRange), but it is useful in tests. +func ReadAllBytes(ctx context.Context, r *Runner, oid OID) ([]byte, error) { + rc, err := NewReadAPIImpl(r).BlobReader(ctx, oid) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + +// NormalizeGitPlumbingError unwraps CmdError wrappers, returning the underlying error. +// Mostly useful for callers that want to compare against context cancellation. +func NormalizeGitPlumbingError(err error) error { + var ce *CmdError + if errors.As(err, &ce) && ce.Cause != nil { + return ce.Cause + } + return err +} diff --git a/go/store/blobstore/internal/git/write_impl.go b/go/store/blobstore/internal/git/write_impl.go new file mode 100644 index 0000000000..e1801bf142 --- /dev/null +++ b/go/store/blobstore/internal/git/write_impl.go @@ -0,0 +1,113 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "context" + "fmt" + "strings" +) + +// WriteAPIImpl implements WriteAPI using the git CLI plumbing commands, via Runner. +// It performs updates without requiring a working tree checkout. +type WriteAPIImpl struct { + r *Runner +} + +var _ WriteAPI = (*WriteAPIImpl)(nil) + +func NewWriteAPIImpl(r *Runner) *WriteAPIImpl { + return &WriteAPIImpl{r: r} +} + +func (p *WriteAPIImpl) ReadTree(ctx context.Context, commit OID, indexFile string) error { + _, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "read-tree", commit.String()+"^{tree}") + return err +} + +func (p *WriteAPIImpl) ReadTreeEmpty(ctx context.Context, indexFile string) error { + _, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "read-tree", "--empty") + return err +} + +func (p *WriteAPIImpl) UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error { + _, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "update-index", "--add", "--cacheinfo", mode, oid.String(), path) + return err +} + +func (p *WriteAPIImpl) WriteTree(ctx context.Context, indexFile string) (OID, error) { + out, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "write-tree") + if err != nil { + return "", err + } + oid := strings.TrimSpace(string(out)) + if oid == "" { + return "", fmt.Errorf("git write-tree returned empty oid") + } + return OID(oid), nil +} + +func (p *WriteAPIImpl) CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error) { + args := []string{"commit-tree", tree.String(), "-m", message} + if parent != nil && parent.String() != "" { + args = append(args, "-p", parent.String()) + } + + var env []string + if author != nil { + if author.Name != "" { + env = append(env, + "GIT_AUTHOR_NAME="+author.Name, + "GIT_COMMITTER_NAME="+author.Name, + ) + } + if author.Email != "" { + env = append(env, + "GIT_AUTHOR_EMAIL="+author.Email, + "GIT_COMMITTER_EMAIL="+author.Email, + ) + } + } + + out, err := p.r.Run(ctx, RunOptions{Env: env}, args...) + if err != nil { + return "", err + } + oid := strings.TrimSpace(string(out)) + if oid == "" { + return "", fmt.Errorf("git commit-tree returned empty oid") + } + return OID(oid), nil +} + +func (p *WriteAPIImpl) UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error { + args := []string{"update-ref"} + if msg != "" { + args = append(args, "-m", msg) + } + args = append(args, ref, newOID.String(), oldOID.String()) + _, err := p.r.Run(ctx, RunOptions{}, args...) + return err +} + +func (p *WriteAPIImpl) UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error { + args := []string{"update-ref"} + if msg != "" { + args = append(args, "-m", msg) + } + args = append(args, ref, newOID.String()) + _, err := p.r.Run(ctx, RunOptions{}, args...) + return err +}