/go/store/blobstore/{git_blobstore,internal}: add git internal write api, refactor reads a bit

This commit is contained in:
coffeegoddd☕️✨
2026-02-04 11:10:39 -08:00
parent 41944c8e3a
commit f0aaed72a0
6 changed files with 354 additions and 158 deletions
+8 -7
View File
@@ -34,6 +34,7 @@ type GitBlobstore struct {
gitDir string
ref string
runner *git.Runner
read git.ReadAPI
}
var _ Blobstore = (*GitBlobstore)(nil)
@@ -45,7 +46,7 @@ func NewGitBlobstore(gitDir, ref string) (*GitBlobstore, error) {
if err != nil {
return nil, err
}
return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r}, nil
return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r, read: git.NewReadAPIImpl(r)}, nil
}
func (gbs *GitBlobstore) Path() string {
@@ -57,14 +58,14 @@ func (gbs *GitBlobstore) Exists(ctx context.Context, key string) (bool, error) {
if err != nil {
return false, err
}
commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref)
commit, ok, err := gbs.read.TryResolveRefCommit(ctx, gbs.ref)
if err != nil {
return false, err
}
if !ok {
return false, nil
}
_, err = git.ResolvePathBlob(ctx, gbs.runner, commit, key)
_, err = gbs.read.ResolvePathBlob(ctx, commit, key)
if err != nil {
if git.IsPathNotFound(err) {
return false, nil
@@ -79,7 +80,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.
if err != nil {
return nil, 0, "", err
}
commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref)
commit, ok, err := gbs.read.TryResolveRefCommit(ctx, gbs.ref)
if err != nil {
return nil, 0, "", err
}
@@ -92,7 +93,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.
return nil, 0, "", &git.RefNotFoundError{Ref: gbs.ref}
}
blobOID, err := git.ResolvePathBlob(ctx, gbs.runner, commit, key)
blobOID, err := gbs.read.ResolvePathBlob(ctx, commit, key)
if err != nil {
if git.IsPathNotFound(err) {
return nil, 0, commit.String(), NotFound{Key: key}
@@ -100,7 +101,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.
return nil, 0, commit.String(), err
}
sz, err := git.BlobSize(ctx, gbs.runner, blobOID)
sz, err := gbs.read.BlobSize(ctx, blobOID)
if err != nil {
return nil, 0, commit.String(), err
}
@@ -108,7 +109,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.
// TODO(gitblobstore): This streaming implementation is correct but may be slow for workloads
// that do many small ranged reads (e.g. table index/footer reads). Consider caching/materializing
// blobs to a local file (or using a batched git cat-file mode) to serve ranges efficiently.
rc, err := git.BlobReader(ctx, gbs.runner, blobOID)
rc, err := gbs.read.BlobReader(ctx, blobOID)
if err != nil {
return nil, 0, commit.String(), err
}
+20
View File
@@ -0,0 +1,20 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package git
// OID is a git object id in hex (typically 40-char SHA1).
type OID string
func (o OID) String() string { return string(o) }
+20 -151
View File
@@ -15,162 +15,31 @@
package git
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"strconv"
"strings"
)
// OID is a git object id in hex (typically 40-char SHA1).
type OID string
// ReadAPI defines the git plumbing operations used by read paths. Like WriteAPI,
// it is designed to support swapping implementations (e.g. git CLI vs a Go git library)
// while keeping callers (such as GitBlobstore) stable.
type ReadAPI interface {
// TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist.
TryResolveRefCommit(ctx context.Context, ref string) (oid OID, ok bool, err error)
func (o OID) String() string { return string(o) }
// ResolveRefCommit resolves |ref| to a commit OID, returning RefNotFoundError if missing.
ResolveRefCommit(ctx context.Context, ref string) (OID, error)
// TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist.
func TryResolveRefCommit(ctx context.Context, r *Runner, ref string) (oid OID, ok bool, err error) {
out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}")
if err == nil {
s := strings.TrimSpace(string(out))
if s == "" {
// Shouldn't happen, but treat as missing.
return "", false, nil
}
return OID(s), true, nil
}
// ResolvePathBlob resolves |path| within |commit| to a blob OID.
// It returns PathNotFoundError if the path does not exist, and NotBlobError if it
// resolves to a non-blob object.
ResolvePathBlob(ctx context.Context, commit OID, path string) (OID, error)
if isRefNotFoundErr(err) {
return "", false, nil
}
return "", false, err
}
// ResolveRefCommit resolves |ref| to a commit OID.
func ResolveRefCommit(ctx context.Context, r *Runner, ref string) (OID, error) {
oid, ok, err := TryResolveRefCommit(ctx, r, ref)
if err != nil {
return "", err
}
if !ok {
return "", &RefNotFoundError{Ref: ref}
}
return oid, nil
}
// ResolvePathBlob resolves |path| within |commit| to a blob OID.
// It returns PathNotFoundError if the path does not exist, and NotBlobError if the
// path resolves to a non-blob object (e.g. a tree).
func ResolvePathBlob(ctx context.Context, r *Runner, commit OID, path string) (OID, error) {
spec := commit.String() + ":" + path
out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec)
if err != nil {
if isPathNotFoundErr(err) {
return "", &PathNotFoundError{Commit: commit.String(), Path: path}
}
return "", err
}
oid := strings.TrimSpace(string(out))
if oid == "" {
return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec)
}
typ, err := CatFileType(ctx, r, OID(oid))
if err != nil {
return "", err
}
if typ != "blob" {
return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ}
}
return OID(oid), nil
}
// CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit").
func CatFileType(ctx context.Context, r *Runner, oid OID) (string, error) {
out, err := r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String())
if err != nil {
return "", err
}
return strings.TrimSpace(string(out)), nil
}
// BlobSize returns the size in bytes of the blob object |oid|.
func BlobSize(ctx context.Context, r *Runner, oid OID) (int64, error) {
out, err := r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String())
if err != nil {
return 0, err
}
s := strings.TrimSpace(string(out))
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err)
}
return n, nil
}
// BlobReader returns a reader for blob contents. The returned ReadCloser will wait for
// the git process to exit when closed, returning a CmdError if the process fails.
func BlobReader(ctx context.Context, r *Runner, oid OID) (io.ReadCloser, error) {
rc, _, err := r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String())
return rc, err
}
func isRefNotFoundErr(err error) bool {
ce, ok := err.(*CmdError)
if !ok {
return false
}
// For `git rev-parse --verify --quiet <ref>^{commit}`, a missing ref typically yields exit 1 and no output.
if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 {
return true
}
// Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check.
msg := strings.ToLower(string(ce.Output))
return strings.Contains(msg, "needed a single revision") ||
strings.Contains(msg, "unknown revision") ||
strings.Contains(msg, "not a valid object name")
}
func isPathNotFoundErr(err error) bool {
ce, ok := err.(*CmdError)
if !ok {
return false
}
if ce.ExitCode == 128 || ce.ExitCode == 1 {
msg := strings.ToLower(string(ce.Output))
// Common patterns:
// - "fatal: Path 'x' does not exist in 'HEAD'"
// - "fatal: invalid object name 'HEAD:x'"
// - "fatal: Needed a single revision"
// - "fatal: ambiguous argument '...': unknown revision or path not in the working tree."
if strings.Contains(msg, "does not exist in") ||
strings.Contains(msg, "invalid object name") ||
strings.Contains(msg, "needed a single revision") ||
strings.Contains(msg, "unknown revision or path not in the working tree") {
return true
}
}
return false
}
// ReadAllBytes is a small helper for read-path callers that want a whole object.
// This is not used by GitBlobstore.Get (which must support BlobRange), but it is useful in tests.
func ReadAllBytes(ctx context.Context, r *Runner, oid OID) ([]byte, error) {
rc, err := BlobReader(ctx, r, oid)
if err != nil {
return nil, err
}
defer rc.Close()
return io.ReadAll(rc)
}
// NormalizeGitPlumbingError unwraps CmdError wrappers, returning the underlying error.
// Mostly useful for callers that want to compare against context cancellation.
func NormalizeGitPlumbingError(err error) error {
var ce *CmdError
if errors.As(err, &ce) && ce.Cause != nil {
return ce.Cause
}
return err
// CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit").
CatFileType(ctx context.Context, oid OID) (string, error)
// BlobSize returns the size in bytes of the blob object |oid|.
BlobSize(ctx context.Context, oid OID) (int64, error)
// BlobReader returns a reader for blob contents.
BlobReader(ctx context.Context, oid OID) (io.ReadCloser, error)
}
@@ -0,0 +1,151 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package git
import (
"bytes"
"context"
"fmt"
"io"
"strconv"
"strings"
)
// ReadAPIImpl implements ReadAPI using the git CLI plumbing commands, via Runner.
type ReadAPIImpl struct {
r *Runner
}
var _ ReadAPI = (*ReadAPIImpl)(nil)
func NewReadAPIImpl(r *Runner) *ReadAPIImpl {
return &ReadAPIImpl{r: r}
}
func (a *ReadAPIImpl) TryResolveRefCommit(ctx context.Context, ref string) (oid OID, ok bool, err error) {
out, err := a.r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}")
if err == nil {
s := strings.TrimSpace(string(out))
if s == "" {
// Shouldn't happen, but treat as missing.
return "", false, nil
}
return OID(s), true, nil
}
if isRefNotFoundErr(err) {
return "", false, nil
}
return "", false, err
}
func (a *ReadAPIImpl) ResolveRefCommit(ctx context.Context, ref string) (OID, error) {
oid, ok, err := a.TryResolveRefCommit(ctx, ref)
if err != nil {
return "", err
}
if !ok {
return "", &RefNotFoundError{Ref: ref}
}
return oid, nil
}
func (a *ReadAPIImpl) ResolvePathBlob(ctx context.Context, commit OID, path string) (OID, error) {
spec := commit.String() + ":" + path
out, err := a.r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec)
if err != nil {
if isPathNotFoundErr(err) {
return "", &PathNotFoundError{Commit: commit.String(), Path: path}
}
return "", err
}
oid := strings.TrimSpace(string(out))
if oid == "" {
return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec)
}
typ, err := a.CatFileType(ctx, OID(oid))
if err != nil {
return "", err
}
if typ != "blob" {
return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ}
}
return OID(oid), nil
}
func (a *ReadAPIImpl) CatFileType(ctx context.Context, oid OID) (string, error) {
out, err := a.r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String())
if err != nil {
return "", err
}
return strings.TrimSpace(string(out)), nil
}
func (a *ReadAPIImpl) BlobSize(ctx context.Context, oid OID) (int64, error) {
out, err := a.r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String())
if err != nil {
return 0, err
}
s := strings.TrimSpace(string(out))
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err)
}
return n, nil
}
func (a *ReadAPIImpl) BlobReader(ctx context.Context, oid OID) (io.ReadCloser, error) {
rc, _, err := a.r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String())
return rc, err
}
func isRefNotFoundErr(err error) bool {
ce, ok := err.(*CmdError)
if !ok {
return false
}
// For `git rev-parse --verify --quiet <ref>^{commit}`, a missing ref typically yields exit 1 and no output.
if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 {
return true
}
// Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check.
msg := strings.ToLower(string(ce.Output))
return strings.Contains(msg, "needed a single revision") ||
strings.Contains(msg, "unknown revision") ||
strings.Contains(msg, "not a valid object name")
}
func isPathNotFoundErr(err error) bool {
ce, ok := err.(*CmdError)
if !ok {
return false
}
if ce.ExitCode == 128 || ce.ExitCode == 1 {
msg := strings.ToLower(string(ce.Output))
// Common patterns:
// - "fatal: Path 'x' does not exist in 'HEAD'"
// - "fatal: invalid object name 'HEAD:x'"
// - "fatal: Needed a single revision"
// - "fatal: ambiguous argument '...': unknown revision or path not in the working tree."
if strings.Contains(msg, "does not exist in") ||
strings.Contains(msg, "invalid object name") ||
strings.Contains(msg, "needed a single revision") ||
strings.Contains(msg, "unknown revision or path not in the working tree") {
return true
}
}
return false
}
+42
View File
@@ -0,0 +1,42 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package git
import (
"context"
"errors"
"io"
)
// ReadAllBytes is a small helper for read-path callers that want a whole object.
// This is not used by GitBlobstore.Get (which must support BlobRange), but it is useful in tests.
func ReadAllBytes(ctx context.Context, r *Runner, oid OID) ([]byte, error) {
rc, err := NewReadAPIImpl(r).BlobReader(ctx, oid)
if err != nil {
return nil, err
}
defer rc.Close()
return io.ReadAll(rc)
}
// NormalizeGitPlumbingError unwraps CmdError wrappers, returning the underlying error.
// Mostly useful for callers that want to compare against context cancellation.
func NormalizeGitPlumbingError(err error) error {
var ce *CmdError
if errors.As(err, &ce) && ce.Cause != nil {
return ce.Cause
}
return err
}
@@ -0,0 +1,113 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package git
import (
"context"
"fmt"
"strings"
)
// WriteAPIImpl implements WriteAPI using the git CLI plumbing commands, via Runner.
// It performs updates without requiring a working tree checkout.
type WriteAPIImpl struct {
r *Runner
}
var _ WriteAPI = (*WriteAPIImpl)(nil)
func NewWriteAPIImpl(r *Runner) *WriteAPIImpl {
return &WriteAPIImpl{r: r}
}
func (p *WriteAPIImpl) ReadTree(ctx context.Context, commit OID, indexFile string) error {
_, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "read-tree", commit.String()+"^{tree}")
return err
}
func (p *WriteAPIImpl) ReadTreeEmpty(ctx context.Context, indexFile string) error {
_, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "read-tree", "--empty")
return err
}
func (p *WriteAPIImpl) UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error {
_, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "update-index", "--add", "--cacheinfo", mode, oid.String(), path)
return err
}
func (p *WriteAPIImpl) WriteTree(ctx context.Context, indexFile string) (OID, error) {
out, err := p.r.Run(ctx, RunOptions{IndexFile: indexFile}, "write-tree")
if err != nil {
return "", err
}
oid := strings.TrimSpace(string(out))
if oid == "" {
return "", fmt.Errorf("git write-tree returned empty oid")
}
return OID(oid), nil
}
func (p *WriteAPIImpl) CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error) {
args := []string{"commit-tree", tree.String(), "-m", message}
if parent != nil && parent.String() != "" {
args = append(args, "-p", parent.String())
}
var env []string
if author != nil {
if author.Name != "" {
env = append(env,
"GIT_AUTHOR_NAME="+author.Name,
"GIT_COMMITTER_NAME="+author.Name,
)
}
if author.Email != "" {
env = append(env,
"GIT_AUTHOR_EMAIL="+author.Email,
"GIT_COMMITTER_EMAIL="+author.Email,
)
}
}
out, err := p.r.Run(ctx, RunOptions{Env: env}, args...)
if err != nil {
return "", err
}
oid := strings.TrimSpace(string(out))
if oid == "" {
return "", fmt.Errorf("git commit-tree returned empty oid")
}
return OID(oid), nil
}
func (p *WriteAPIImpl) UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error {
args := []string{"update-ref"}
if msg != "" {
args = append(args, "-m", msg)
}
args = append(args, ref, newOID.String(), oldOID.String())
_, err := p.r.Run(ctx, RunOptions{}, args...)
return err
}
func (p *WriteAPIImpl) UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error {
args := []string{"update-ref"}
if msg != "" {
args = append(args, "-m", msg)
}
args = append(args, ref, newOID.String())
_, err := p.r.Run(ctx, RunOptions{}, args...)
return err
}