mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-23 21:59:01 -05:00
Merge pull request #10409 from dolthub/db/gitblobstore
Add read-only GitBlobstore
This commit is contained in:
+4
-1
@@ -21,4 +21,7 @@ integration-tests/bats/batsee_results
|
||||
CLAUDE.md
|
||||
|
||||
*~
|
||||
.dir-locals.el
|
||||
.dir-locals.el
|
||||
.beads
|
||||
.gitattributes
|
||||
|
||||
|
||||
@@ -0,0 +1,217 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package blobstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
git "github.com/dolthub/dolt/go/store/blobstore/internal/git"
|
||||
)
|
||||
|
||||
// GitBlobstore is a Blobstore implementation backed by a git repository's object
|
||||
// database (bare repo or .git directory). It stores keys as paths within the tree
|
||||
// of the commit referenced by a git ref (e.g. refs/dolt/data).
|
||||
//
|
||||
// This initial implementation is intentionally READ-ONLY. Write-path methods
|
||||
// (Put / CheckAndPut / Concatenate) return an explicit unimplemented error while
|
||||
// we lock down read behavior for manifests and table files.
|
||||
type GitBlobstore struct {
|
||||
gitDir string
|
||||
ref string
|
||||
runner *git.Runner
|
||||
}
|
||||
|
||||
var _ Blobstore = (*GitBlobstore)(nil)
|
||||
|
||||
// NewGitBlobstore creates a new read-only GitBlobstore rooted at |gitDir| and |ref|.
|
||||
// |gitDir| should point at a bare repo directory or a .git directory.
|
||||
func NewGitBlobstore(gitDir, ref string) (*GitBlobstore, error) {
|
||||
r, err := git.NewRunner(gitDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r}, nil
|
||||
}
|
||||
|
||||
func (gbs *GitBlobstore) Path() string {
|
||||
return fmt.Sprintf("%s@%s", gbs.gitDir, gbs.ref)
|
||||
}
|
||||
|
||||
func (gbs *GitBlobstore) Exists(ctx context.Context, key string) (bool, error) {
|
||||
key, err := normalizeGitTreePath(key)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
_, err = git.ResolvePathBlob(ctx, gbs.runner, commit, key)
|
||||
if err != nil {
|
||||
if git.IsPathNotFound(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.ReadCloser, uint64, string, error) {
|
||||
key, err := normalizeGitTreePath(key)
|
||||
if err != nil {
|
||||
return nil, 0, "", err
|
||||
}
|
||||
commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref)
|
||||
if err != nil {
|
||||
return nil, 0, "", err
|
||||
}
|
||||
if !ok {
|
||||
// If the ref doesn't exist, treat the manifest as missing (empty store),
|
||||
// but surface a hard error for other keys: the store itself is missing.
|
||||
if key == "manifest" {
|
||||
return nil, 0, "", NotFound{Key: key}
|
||||
}
|
||||
return nil, 0, "", &git.RefNotFoundError{Ref: gbs.ref}
|
||||
}
|
||||
|
||||
blobOID, err := git.ResolvePathBlob(ctx, gbs.runner, commit, key)
|
||||
if err != nil {
|
||||
if git.IsPathNotFound(err) {
|
||||
return nil, 0, commit.String(), NotFound{Key: key}
|
||||
}
|
||||
return nil, 0, commit.String(), err
|
||||
}
|
||||
|
||||
sz, err := git.BlobSize(ctx, gbs.runner, blobOID)
|
||||
if err != nil {
|
||||
return nil, 0, commit.String(), err
|
||||
}
|
||||
|
||||
// TODO(gitblobstore): This streaming implementation is correct but may be slow for workloads
|
||||
// that do many small ranged reads (e.g. table index/footer reads). Consider caching/materializing
|
||||
// blobs to a local file (or using a batched git cat-file mode) to serve ranges efficiently.
|
||||
rc, err := git.BlobReader(ctx, gbs.runner, blobOID)
|
||||
if err != nil {
|
||||
return nil, 0, commit.String(), err
|
||||
}
|
||||
|
||||
// Implement BlobRange by slicing the streamed blob contents.
|
||||
if br.isAllRange() {
|
||||
return rc, uint64(sz), commit.String(), nil
|
||||
}
|
||||
|
||||
pos := br.positiveRange(sz)
|
||||
if pos.offset < 0 || pos.offset > sz {
|
||||
_ = rc.Close()
|
||||
return nil, uint64(sz), commit.String(), fmt.Errorf("invalid BlobRange offset %d for blob of size %d", pos.offset, sz)
|
||||
}
|
||||
if pos.length < 0 {
|
||||
_ = rc.Close()
|
||||
return nil, uint64(sz), commit.String(), fmt.Errorf("invalid BlobRange length %d", pos.length)
|
||||
}
|
||||
if pos.length == 0 {
|
||||
// Read from offset to end.
|
||||
pos.length = sz - pos.offset
|
||||
}
|
||||
// Clamp to end (defensive; positiveRange should already do this).
|
||||
if pos.offset+pos.length > sz {
|
||||
pos.length = sz - pos.offset
|
||||
}
|
||||
|
||||
// Skip to offset.
|
||||
if pos.offset > 0 {
|
||||
if _, err := io.CopyN(io.Discard, rc, pos.offset); err != nil {
|
||||
_ = rc.Close()
|
||||
return nil, uint64(sz), commit.String(), err
|
||||
}
|
||||
}
|
||||
|
||||
return &limitReadCloser{r: io.LimitReader(rc, pos.length), c: rc}, uint64(sz), commit.String(), nil
|
||||
}
|
||||
|
||||
type limitReadCloser struct {
|
||||
r io.Reader
|
||||
c io.Closer
|
||||
}
|
||||
|
||||
func (l *limitReadCloser) Read(p []byte) (int, error) { return l.r.Read(p) }
|
||||
func (l *limitReadCloser) Close() error { return l.c.Close() }
|
||||
|
||||
func (gbs *GitBlobstore) Put(ctx context.Context, key string, totalSize int64, reader io.Reader) (string, error) {
|
||||
if _, err := normalizeGitTreePath(key); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", fmt.Errorf("%w: GitBlobstore.Put", git.ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (gbs *GitBlobstore) CheckAndPut(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) {
|
||||
if _, err := normalizeGitTreePath(key); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", fmt.Errorf("%w: GitBlobstore.CheckAndPut", git.ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (gbs *GitBlobstore) Concatenate(ctx context.Context, key string, sources []string) (string, error) {
|
||||
if _, err := normalizeGitTreePath(key); err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, src := range sources {
|
||||
if _, err := normalizeGitTreePath(src); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("%w: GitBlobstore.Concatenate", git.ErrUnimplemented)
|
||||
}
|
||||
|
||||
// normalizeGitTreePath normalizes and validates a blobstore key for use as a git tree path.
|
||||
//
|
||||
// Rules:
|
||||
// - convert Windows-style separators: "\" -> "/"
|
||||
// - disallow absolute paths (leading "/")
|
||||
// - disallow empty segments and trailing "/"
|
||||
// - disallow "." and ".." segments
|
||||
// - disallow NUL bytes
|
||||
func normalizeGitTreePath(key string) (string, error) {
|
||||
if strings.ContainsRune(key, '\x00') {
|
||||
return "", fmt.Errorf("invalid git blobstore key (NUL byte): %q", key)
|
||||
}
|
||||
key = strings.ReplaceAll(key, "\\", "/")
|
||||
if key == "" {
|
||||
return "", fmt.Errorf("invalid git blobstore key (empty)")
|
||||
}
|
||||
if strings.HasPrefix(key, "/") {
|
||||
return "", fmt.Errorf("invalid git blobstore key (absolute path): %q", key)
|
||||
}
|
||||
|
||||
parts := strings.Split(key, "/")
|
||||
for _, p := range parts {
|
||||
if p == "" {
|
||||
return "", fmt.Errorf("invalid git blobstore key (empty path segment): %q", key)
|
||||
}
|
||||
if p == "." || p == ".." {
|
||||
return "", fmt.Errorf("invalid git blobstore key (path traversal): %q", key)
|
||||
}
|
||||
if strings.ContainsRune(p, '\x00') {
|
||||
return "", fmt.Errorf("invalid git blobstore key (NUL byte): %q", key)
|
||||
}
|
||||
}
|
||||
return key, nil
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package blobstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os/exec"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
git "github.com/dolthub/dolt/go/store/blobstore/internal/git"
|
||||
"github.com/dolthub/dolt/go/store/testutils/gitrepo"
|
||||
)
|
||||
|
||||
func TestGitBlobstore_RefMissingIsNotFound(t *testing.T) {
|
||||
if _, err := exec.LookPath("git"); err != nil {
|
||||
t.Skip("git not found on PATH")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git")
|
||||
require.NoError(t, err)
|
||||
|
||||
bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data")
|
||||
require.NoError(t, err)
|
||||
|
||||
ok, err := bs.Exists(ctx, "manifest")
|
||||
require.NoError(t, err)
|
||||
require.False(t, ok)
|
||||
|
||||
_, _, err = GetBytes(ctx, bs, "manifest", AllRange)
|
||||
require.Error(t, err)
|
||||
require.True(t, IsNotFoundError(err))
|
||||
|
||||
// For non-manifest keys, missing the ref is a hard error.
|
||||
_, _, _, err = bs.Get(ctx, "table", AllRange)
|
||||
require.Error(t, err)
|
||||
require.False(t, IsNotFoundError(err))
|
||||
var rnf *git.RefNotFoundError
|
||||
require.True(t, errors.As(err, &rnf))
|
||||
}
|
||||
|
||||
func TestGitBlobstore_ExistsAndGet_AllRange(t *testing.T) {
|
||||
if _, err := exec.LookPath("git"); err != nil {
|
||||
t.Skip("git not found on PATH")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git")
|
||||
require.NoError(t, err)
|
||||
|
||||
want := []byte("hello manifest\n")
|
||||
commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{
|
||||
"manifest": want,
|
||||
"dir/file": []byte("abc"),
|
||||
}, "seed")
|
||||
require.NoError(t, err)
|
||||
|
||||
bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data")
|
||||
require.NoError(t, err)
|
||||
|
||||
ok, err := bs.Exists(ctx, "manifest")
|
||||
require.NoError(t, err)
|
||||
require.True(t, ok)
|
||||
|
||||
ok, err = bs.Exists(ctx, "missing")
|
||||
require.NoError(t, err)
|
||||
require.False(t, ok)
|
||||
|
||||
// Validate key normalization: backslash -> slash.
|
||||
ok, err = bs.Exists(ctx, "dir\\file")
|
||||
require.NoError(t, err)
|
||||
require.True(t, ok)
|
||||
|
||||
got, ver, err := GetBytes(ctx, bs, "manifest", AllRange)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, commit, ver)
|
||||
require.Equal(t, want, got)
|
||||
|
||||
// Validate size + version on Get.
|
||||
rc, sz, ver2, err := bs.Get(ctx, "manifest", NewBlobRange(0, 5))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(len(want)), sz)
|
||||
require.Equal(t, commit, ver2)
|
||||
_ = rc.Close()
|
||||
}
|
||||
|
||||
func TestGitBlobstore_Get_NotFoundMissingKey(t *testing.T) {
|
||||
if _, err := exec.LookPath("git"); err != nil {
|
||||
t.Skip("git not found on PATH")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git")
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{
|
||||
"present": []byte("x"),
|
||||
}, "seed")
|
||||
require.NoError(t, err)
|
||||
|
||||
bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data")
|
||||
require.NoError(t, err)
|
||||
|
||||
_, _, err = GetBytes(ctx, bs, "missing", AllRange)
|
||||
require.Error(t, err)
|
||||
require.True(t, IsNotFoundError(err))
|
||||
}
|
||||
|
||||
func TestGitBlobstore_BlobRangeSemantics(t *testing.T) {
|
||||
if _, err := exec.LookPath("git"); err != nil {
|
||||
t.Skip("git not found on PATH")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git")
|
||||
require.NoError(t, err)
|
||||
|
||||
maxValue := int64(16 * 1024)
|
||||
testData := rangeData(0, maxValue)
|
||||
|
||||
commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{
|
||||
"range": testData,
|
||||
}, "range fixture")
|
||||
require.NoError(t, err)
|
||||
|
||||
bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data")
|
||||
require.NoError(t, err)
|
||||
|
||||
// full range
|
||||
got, ver, err := GetBytes(ctx, bs, "range", AllRange)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, commit, ver)
|
||||
require.Equal(t, rangeData(0, maxValue), got)
|
||||
|
||||
// first 2048 bytes (1024 shorts)
|
||||
got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(0, 2048))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, commit, ver)
|
||||
require.Equal(t, rangeData(0, 1024), got)
|
||||
|
||||
// bytes 2048..4096 of original
|
||||
got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(2*1024, 2*1024))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, commit, ver)
|
||||
require.Equal(t, rangeData(1024, 2048), got)
|
||||
|
||||
// last 2048 bytes
|
||||
got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(-2*1024, 0))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, commit, ver)
|
||||
require.Equal(t, rangeData(maxValue-1024, maxValue), got)
|
||||
|
||||
// tail slice: beginning 2048 bytes from end, size 512
|
||||
got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(-2*1024, 512))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, commit, ver)
|
||||
require.Equal(t, rangeData(maxValue-1024, maxValue-768), got)
|
||||
}
|
||||
|
||||
func TestGitBlobstore_InvalidKeysError(t *testing.T) {
|
||||
if _, err := exec.LookPath("git"); err != nil {
|
||||
t.Skip("git not found on PATH")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git")
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{"ok": []byte("x")}, "seed")
|
||||
require.NoError(t, err)
|
||||
|
||||
bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data")
|
||||
require.NoError(t, err)
|
||||
|
||||
invalid := []string{
|
||||
"",
|
||||
"/abs",
|
||||
"../x",
|
||||
"a/../b",
|
||||
"a//b",
|
||||
"a/",
|
||||
".",
|
||||
"..",
|
||||
"a/./b",
|
||||
"a/\x00/b",
|
||||
}
|
||||
|
||||
for _, k := range invalid {
|
||||
_, err := bs.Exists(ctx, k)
|
||||
require.Error(t, err, "expected error for key %q", k)
|
||||
|
||||
_, _, _, err = bs.Get(ctx, k, AllRange)
|
||||
require.Error(t, err, "expected error for key %q", k)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package git
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// ErrUnimplemented is returned by stubbed write-path APIs. It is intentionally
|
||||
// exported so higher layers (e.g. GitBlobstore) can wrap or match it.
|
||||
var ErrUnimplemented = errors.New("unimplemented")
|
||||
|
||||
// RefNotFoundError indicates that a ref (e.g. refs/dolt/data) could not be resolved.
|
||||
type RefNotFoundError struct {
|
||||
Ref string
|
||||
}
|
||||
|
||||
func (e *RefNotFoundError) Error() string {
|
||||
return fmt.Sprintf("git ref not found: %s", e.Ref)
|
||||
}
|
||||
|
||||
// PathNotFoundError indicates that a tree path could not be resolved within a commit.
|
||||
type PathNotFoundError struct {
|
||||
Commit string
|
||||
Path string
|
||||
}
|
||||
|
||||
func (e *PathNotFoundError) Error() string {
|
||||
return fmt.Sprintf("git path not found: %s:%s", e.Commit, e.Path)
|
||||
}
|
||||
|
||||
// NotBlobError indicates that a resolved path did not refer to a blob object.
|
||||
type NotBlobError struct {
|
||||
Commit string
|
||||
Path string
|
||||
Type string
|
||||
}
|
||||
|
||||
func (e *NotBlobError) Error() string {
|
||||
if e.Type == "" {
|
||||
return fmt.Sprintf("git path is not a blob: %s:%s", e.Commit, e.Path)
|
||||
}
|
||||
return fmt.Sprintf("git path is not a blob (%s): %s:%s", e.Type, e.Commit, e.Path)
|
||||
}
|
||||
|
||||
func IsRefNotFound(err error) bool {
|
||||
var e *RefNotFoundError
|
||||
return errors.As(err, &e)
|
||||
}
|
||||
|
||||
func IsPathNotFound(err error) bool {
|
||||
var e *PathNotFoundError
|
||||
return errors.As(err, &e)
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package git
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// OID is a git object id in hex (typically 40-char SHA1).
|
||||
type OID string
|
||||
|
||||
func (o OID) String() string { return string(o) }
|
||||
|
||||
// TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist.
|
||||
func TryResolveRefCommit(ctx context.Context, r *Runner, ref string) (oid OID, ok bool, err error) {
|
||||
out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}")
|
||||
if err == nil {
|
||||
s := strings.TrimSpace(string(out))
|
||||
if s == "" {
|
||||
// Shouldn't happen, but treat as missing.
|
||||
return "", false, nil
|
||||
}
|
||||
return OID(s), true, nil
|
||||
}
|
||||
|
||||
if isRefNotFoundErr(err) {
|
||||
return "", false, nil
|
||||
}
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
// ResolveRefCommit resolves |ref| to a commit OID.
|
||||
func ResolveRefCommit(ctx context.Context, r *Runner, ref string) (OID, error) {
|
||||
oid, ok, err := TryResolveRefCommit(ctx, r, ref)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if !ok {
|
||||
return "", &RefNotFoundError{Ref: ref}
|
||||
}
|
||||
return oid, nil
|
||||
}
|
||||
|
||||
// ResolvePathBlob resolves |path| within |commit| to a blob OID.
|
||||
// It returns PathNotFoundError if the path does not exist, and NotBlobError if the
|
||||
// path resolves to a non-blob object (e.g. a tree).
|
||||
func ResolvePathBlob(ctx context.Context, r *Runner, commit OID, path string) (OID, error) {
|
||||
spec := commit.String() + ":" + path
|
||||
out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec)
|
||||
if err != nil {
|
||||
if isPathNotFoundErr(err) {
|
||||
return "", &PathNotFoundError{Commit: commit.String(), Path: path}
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
oid := strings.TrimSpace(string(out))
|
||||
if oid == "" {
|
||||
return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec)
|
||||
}
|
||||
|
||||
typ, err := CatFileType(ctx, r, OID(oid))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if typ != "blob" {
|
||||
return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ}
|
||||
}
|
||||
return OID(oid), nil
|
||||
}
|
||||
|
||||
// CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit").
|
||||
func CatFileType(ctx context.Context, r *Runner, oid OID) (string, error) {
|
||||
out, err := r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(out)), nil
|
||||
}
|
||||
|
||||
// BlobSize returns the size in bytes of the blob object |oid|.
|
||||
func BlobSize(ctx context.Context, r *Runner, oid OID) (int64, error) {
|
||||
out, err := r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
s := strings.TrimSpace(string(out))
|
||||
n, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// BlobReader returns a reader for blob contents. The returned ReadCloser will wait for
|
||||
// the git process to exit when closed, returning a CmdError if the process fails.
|
||||
func BlobReader(ctx context.Context, r *Runner, oid OID) (io.ReadCloser, error) {
|
||||
rc, _, err := r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String())
|
||||
return rc, err
|
||||
}
|
||||
|
||||
func isRefNotFoundErr(err error) bool {
|
||||
ce, ok := err.(*CmdError)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
// For `git rev-parse --verify --quiet <ref>^{commit}`, a missing ref typically yields exit 1 and no output.
|
||||
if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 {
|
||||
return true
|
||||
}
|
||||
// Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check.
|
||||
msg := strings.ToLower(string(ce.Output))
|
||||
return strings.Contains(msg, "needed a single revision") ||
|
||||
strings.Contains(msg, "unknown revision") ||
|
||||
strings.Contains(msg, "not a valid object name")
|
||||
}
|
||||
|
||||
func isPathNotFoundErr(err error) bool {
|
||||
ce, ok := err.(*CmdError)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if ce.ExitCode == 128 || ce.ExitCode == 1 {
|
||||
msg := strings.ToLower(string(ce.Output))
|
||||
// Common patterns:
|
||||
// - "fatal: Path 'x' does not exist in 'HEAD'"
|
||||
// - "fatal: invalid object name 'HEAD:x'"
|
||||
// - "fatal: Needed a single revision"
|
||||
// - "fatal: ambiguous argument '...': unknown revision or path not in the working tree."
|
||||
if strings.Contains(msg, "does not exist in") ||
|
||||
strings.Contains(msg, "invalid object name") ||
|
||||
strings.Contains(msg, "needed a single revision") ||
|
||||
strings.Contains(msg, "unknown revision or path not in the working tree") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ReadAllBytes is a small helper for read-path callers that want a whole object.
|
||||
// This is not used by GitBlobstore.Get (which must support BlobRange), but it is useful in tests.
|
||||
func ReadAllBytes(ctx context.Context, r *Runner, oid OID) ([]byte, error) {
|
||||
rc, err := BlobReader(ctx, r, oid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rc.Close()
|
||||
return io.ReadAll(rc)
|
||||
}
|
||||
|
||||
// NormalizeGitPlumbingError unwraps CmdError wrappers, returning the underlying error.
|
||||
// Mostly useful for callers that want to compare against context cancellation.
|
||||
func NormalizeGitPlumbingError(err error) error {
|
||||
var ce *CmdError
|
||||
if errors.As(err, &ce) && ce.Cause != nil {
|
||||
return ce.Cause
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -0,0 +1,255 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package git provides helpers for invoking git plumbing commands against a bare
|
||||
// repository or .git directory without a working tree checkout.
|
||||
package git
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const maxCapturedOutputBytes = 64 * 1024
|
||||
|
||||
// Runner executes git commands with GIT_DIR set (and optionally GIT_INDEX_FILE).
|
||||
// It is intended for git plumbing usage and should not require a working tree.
|
||||
type Runner struct {
|
||||
gitPath string
|
||||
gitDir string
|
||||
// extraEnv is appended to os.Environ() for every command.
|
||||
extraEnv []string
|
||||
}
|
||||
|
||||
// NewRunner creates a Runner using the git binary on PATH.
|
||||
func NewRunner(gitDir string) (*Runner, error) {
|
||||
p, err := exec.LookPath("git")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("git not found on PATH: %w", err)
|
||||
}
|
||||
return NewRunnerWithGitPath(gitDir, p), nil
|
||||
}
|
||||
|
||||
// NewRunnerWithGitPath creates a Runner using an explicit git binary path.
|
||||
func NewRunnerWithGitPath(gitDir, gitPath string) *Runner {
|
||||
return &Runner{
|
||||
gitPath: gitPath,
|
||||
gitDir: gitDir,
|
||||
}
|
||||
}
|
||||
|
||||
// WithExtraEnv returns a copy of r that appends env entries (e.g. "K=V") to all commands.
|
||||
func (r *Runner) WithExtraEnv(env ...string) *Runner {
|
||||
cp := *r
|
||||
cp.extraEnv = append(append([]string(nil), r.extraEnv...), env...)
|
||||
return &cp
|
||||
}
|
||||
|
||||
// RunOptions control a single git invocation.
|
||||
type RunOptions struct {
|
||||
// Dir is the working directory for the git process. Optional.
|
||||
Dir string
|
||||
// IndexFile sets GIT_INDEX_FILE for the git process. Optional.
|
||||
IndexFile string
|
||||
// Stdin provides stdin to the git process. Optional.
|
||||
Stdin io.Reader
|
||||
// Stdout and Stderr override output destinations. If both are nil, output is captured and returned.
|
||||
Stdout io.Writer
|
||||
Stderr io.Writer
|
||||
// Env is appended to the process environment.
|
||||
Env []string
|
||||
}
|
||||
|
||||
// CmdError represents a failed git invocation with captured output.
|
||||
type CmdError struct {
|
||||
Args []string
|
||||
Dir string
|
||||
ExitCode int
|
||||
Output []byte
|
||||
Cause error
|
||||
}
|
||||
|
||||
func (e *CmdError) Error() string {
|
||||
var b strings.Builder
|
||||
b.WriteString("git command failed")
|
||||
if e.ExitCode != 0 {
|
||||
b.WriteString(fmt.Sprintf(" (exit %d)", e.ExitCode))
|
||||
}
|
||||
if len(e.Args) > 0 {
|
||||
b.WriteString("\ncommand: git ")
|
||||
b.WriteString(strings.Join(e.Args, " "))
|
||||
}
|
||||
if e.Dir != "" {
|
||||
b.WriteString("\ndir: ")
|
||||
b.WriteString(e.Dir)
|
||||
}
|
||||
b.WriteString("\noutput:\n")
|
||||
b.WriteString(formatOutput(e.Output))
|
||||
if e.Cause != nil {
|
||||
b.WriteString("\nerror: ")
|
||||
b.WriteString(e.Cause.Error())
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (e *CmdError) Unwrap() error { return e.Cause }
|
||||
|
||||
// Run executes "git <args...>" with GIT_DIR set and returns captured combined output
|
||||
// when Stdout/Stderr are not supplied.
|
||||
func (r *Runner) Run(ctx context.Context, opts RunOptions, args ...string) ([]byte, error) {
|
||||
cmd := exec.CommandContext(ctx, r.gitPath, args...) //nolint:gosec // args are controlled by caller; used for internal plumbing.
|
||||
if opts.Dir != "" {
|
||||
cmd.Dir = opts.Dir
|
||||
}
|
||||
cmd.Env = r.env(opts)
|
||||
|
||||
if opts.Stdin != nil {
|
||||
cmd.Stdin = opts.Stdin
|
||||
}
|
||||
|
||||
// Capture combined output unless caller provided destinations.
|
||||
var buf bytes.Buffer
|
||||
if opts.Stdout == nil && opts.Stderr == nil {
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
} else {
|
||||
if opts.Stdout != nil {
|
||||
cmd.Stdout = opts.Stdout
|
||||
}
|
||||
if opts.Stderr != nil {
|
||||
cmd.Stderr = opts.Stderr
|
||||
} else if opts.Stdout != nil {
|
||||
// Reasonable default: if only Stdout is set, send stderr there too.
|
||||
cmd.Stderr = opts.Stdout
|
||||
}
|
||||
}
|
||||
|
||||
err := cmd.Run()
|
||||
out := buf.Bytes()
|
||||
if err == nil {
|
||||
return out, nil
|
||||
}
|
||||
|
||||
exitCode := 0
|
||||
var ee *exec.ExitError
|
||||
if errors.As(err, &ee) {
|
||||
exitCode = ee.ExitCode()
|
||||
}
|
||||
return out, &CmdError{
|
||||
Args: append([]string(nil), args...),
|
||||
Dir: cmd.Dir,
|
||||
ExitCode: exitCode,
|
||||
Output: out,
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts "git <args...>" and returns a ReadCloser for stdout.
|
||||
//
|
||||
// Resource management:
|
||||
// - Call Close() on the returned ReadCloser to ensure the underlying git process
|
||||
// is waited (cmd.Wait()) and resources are released.
|
||||
// - The returned *exec.Cmd is provided for advanced uses (e.g. signals), but most
|
||||
// callers should not call Wait() directly.
|
||||
func (r *Runner) Start(ctx context.Context, opts RunOptions, args ...string) (io.ReadCloser, *exec.Cmd, error) {
|
||||
cmd := exec.CommandContext(ctx, r.gitPath, args...) //nolint:gosec // args are controlled by caller; used for internal plumbing.
|
||||
if opts.Dir != "" {
|
||||
cmd.Dir = opts.Dir
|
||||
}
|
||||
cmd.Env = r.env(opts)
|
||||
if opts.Stdin != nil {
|
||||
cmd.Stdin = opts.Stdin
|
||||
}
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// Capture stderr into a buffer so failures have actionable output.
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
_ = stdout.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Wrap stdout so that Close also waits to avoid zombies if callers bail early.
|
||||
rc := &cmdReadCloser{
|
||||
r: stdout,
|
||||
cmd: cmd,
|
||||
stderr: &stderr,
|
||||
args: append([]string(nil), args...),
|
||||
dir: cmd.Dir,
|
||||
}
|
||||
return rc, cmd, nil
|
||||
}
|
||||
|
||||
type cmdReadCloser struct {
|
||||
r io.ReadCloser
|
||||
cmd *exec.Cmd
|
||||
stderr *bytes.Buffer
|
||||
args []string
|
||||
dir string
|
||||
}
|
||||
|
||||
func (c *cmdReadCloser) Read(p []byte) (int, error) { return c.r.Read(p) }
|
||||
|
||||
func (c *cmdReadCloser) Close() error {
|
||||
_ = c.r.Close()
|
||||
err := c.cmd.Wait()
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
exitCode := 0
|
||||
var ee *exec.ExitError
|
||||
if errors.As(err, &ee) {
|
||||
exitCode = ee.ExitCode()
|
||||
}
|
||||
return &CmdError{
|
||||
Args: c.args,
|
||||
Dir: c.dir,
|
||||
ExitCode: exitCode,
|
||||
Output: c.stderr.Bytes(),
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Runner) env(opts RunOptions) []string {
|
||||
env := append([]string(nil), os.Environ()...)
|
||||
env = append(env, "GIT_DIR="+r.gitDir)
|
||||
if opts.IndexFile != "" {
|
||||
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
|
||||
}
|
||||
env = append(env, r.extraEnv...)
|
||||
env = append(env, opts.Env...)
|
||||
return env
|
||||
}
|
||||
|
||||
func formatOutput(out []byte) string {
|
||||
if len(out) == 0 {
|
||||
return "(no output)"
|
||||
}
|
||||
if len(out) <= maxCapturedOutputBytes {
|
||||
return strings.TrimRight(string(out), "\n")
|
||||
}
|
||||
trimmed := out[len(out)-maxCapturedOutputBytes:]
|
||||
return fmt.Sprintf("... (truncated; showing last %d bytes)\n%s", maxCapturedOutputBytes, strings.TrimRight(string(trimmed), "\n"))
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package git
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// WriteAPI defines the git plumbing operations needed for Approach A (temporary index
|
||||
// via GIT_INDEX_FILE) to perform updates without a working tree checkout.
|
||||
//
|
||||
// This file intentionally does not implement these operations yet; the current
|
||||
// GitBlobstore milestone is read-only. All methods on the default implementation
|
||||
// return ErrUnimplemented.
|
||||
type WriteAPI interface {
|
||||
// ReadTree populates |indexFile| with the entries from |commit|'s root tree.
|
||||
// Equivalent plumbing:
|
||||
// GIT_DIR=... GIT_INDEX_FILE=<indexFile> git read-tree <commit>^{tree}
|
||||
ReadTree(ctx context.Context, commit OID, indexFile string) error
|
||||
|
||||
// ReadTreeEmpty initializes |indexFile| to an empty index.
|
||||
// Equivalent plumbing:
|
||||
// GIT_DIR=... GIT_INDEX_FILE=<indexFile> git read-tree --empty
|
||||
ReadTreeEmpty(ctx context.Context, indexFile string) error
|
||||
|
||||
// UpdateIndexCacheInfo adds or replaces |path| in |indexFile| with the given blob |oid| and filemode.
|
||||
// Equivalent plumbing:
|
||||
// GIT_DIR=... GIT_INDEX_FILE=<indexFile> git update-index --add --cacheinfo <mode> <oid> <path>
|
||||
UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error
|
||||
|
||||
// WriteTree writes a tree object from the contents of |indexFile| and returns its oid.
|
||||
// Equivalent plumbing:
|
||||
// GIT_DIR=... GIT_INDEX_FILE=<indexFile> git write-tree
|
||||
WriteTree(ctx context.Context, indexFile string) (OID, error)
|
||||
|
||||
// CommitTree creates a commit object from |tree| with optional |parent| and returns its oid.
|
||||
// Equivalent plumbing:
|
||||
// GIT_DIR=... git commit-tree <tree> [-p <parent>] -m <message>
|
||||
CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error)
|
||||
|
||||
// UpdateRefCAS atomically updates |ref| from |old| to |new|.
|
||||
// Equivalent plumbing:
|
||||
// GIT_DIR=... git update-ref -m <msg> <ref> <new> <old>
|
||||
UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error
|
||||
|
||||
// UpdateRef updates |ref| to |new| without a compare-and-swap.
|
||||
// Equivalent plumbing:
|
||||
// GIT_DIR=... git update-ref -m <msg> <ref> <new>
|
||||
UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error
|
||||
}
|
||||
|
||||
// Identity represents git author/committer metadata. A future implementation
|
||||
// may set this via environment variables (GIT_AUTHOR_NAME, etc.).
|
||||
type Identity struct {
|
||||
Name string
|
||||
Email string
|
||||
}
|
||||
|
||||
// UnimplementedWriteAPI is the default write API for the read-only milestone.
|
||||
// It can be embedded or returned by constructors to make write paths fail fast.
|
||||
type UnimplementedWriteAPI struct{}
|
||||
|
||||
var _ WriteAPI = UnimplementedWriteAPI{}
|
||||
|
||||
func (UnimplementedWriteAPI) ReadTree(ctx context.Context, commit OID, indexFile string) error {
|
||||
return fmt.Errorf("%w: ReadTree", ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (UnimplementedWriteAPI) ReadTreeEmpty(ctx context.Context, indexFile string) error {
|
||||
return fmt.Errorf("%w: ReadTreeEmpty", ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (UnimplementedWriteAPI) UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error {
|
||||
return fmt.Errorf("%w: UpdateIndexCacheInfo", ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (UnimplementedWriteAPI) WriteTree(ctx context.Context, indexFile string) (OID, error) {
|
||||
return "", fmt.Errorf("%w: WriteTree", ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (UnimplementedWriteAPI) CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error) {
|
||||
return "", fmt.Errorf("%w: CommitTree", ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (UnimplementedWriteAPI) UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error {
|
||||
return fmt.Errorf("%w: UpdateRefCAS", ErrUnimplemented)
|
||||
}
|
||||
|
||||
func (UnimplementedWriteAPI) UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error {
|
||||
return fmt.Errorf("%w: UpdateRef", ErrUnimplemented)
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"os/exec"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/blobstore"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/testutils/gitrepo"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
func TestGitBlobstoreReadSmoke_ManifestAndTableAccessPatterns(t *testing.T) {
|
||||
if _, err := exec.LookPath("git"); err != nil {
|
||||
t.Skip("git not found on PATH")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Seed a valid v5 manifest with no tables. This should allow NBS to open
|
||||
// without triggering any write paths.
|
||||
mc := manifestContents{
|
||||
nbfVers: types.Format_DOLT.VersionString(),
|
||||
lock: hash.Of([]byte("lock")),
|
||||
root: hash.Of([]byte("root")),
|
||||
gcGen: hash.Of([]byte("gcgen")),
|
||||
specs: nil,
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, writeManifest(&buf, mc))
|
||||
|
||||
// Seed a "table-like" blob to exercise the same access patterns NBS uses:
|
||||
// - tail reads via negative BlobRange offsets
|
||||
// - ReadAt-style ranged reads (ReadAtWithStats)
|
||||
table := make([]byte, 64*1024)
|
||||
for i := range table {
|
||||
table[i] = byte(i % 251)
|
||||
}
|
||||
|
||||
commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{
|
||||
"manifest": buf.Bytes(),
|
||||
"table": table,
|
||||
}, "seed refs/dolt/data for smoke test")
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, commit)
|
||||
|
||||
bs, err := blobstore.NewGitBlobstore(repo.GitDir, "refs/dolt/data")
|
||||
require.NoError(t, err)
|
||||
|
||||
// 1) Manifest read path via blobstoreManifest.ParseIfExists.
|
||||
stats := NewStats()
|
||||
exists, got, err := blobstoreManifest{bs: bs}.ParseIfExists(ctx, stats, nil)
|
||||
require.NoError(t, err)
|
||||
require.True(t, exists)
|
||||
require.Equal(t, mc.nbfVers, got.nbfVers)
|
||||
require.Equal(t, mc.root, got.root)
|
||||
require.Equal(t, mc.lock, got.lock)
|
||||
require.Equal(t, mc.gcGen, got.gcGen)
|
||||
require.Len(t, got.specs, 0)
|
||||
|
||||
// 2) Tail-read pattern used by table index/footer loads:
|
||||
// bs.Get(key, NewBlobRange(-N, 0)) and io.ReadFull.
|
||||
const tailN = 1024
|
||||
rc, totalSz, ver, err := bs.Get(ctx, "table", blobstore.NewBlobRange(-tailN, 0))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(len(table)), totalSz)
|
||||
require.Equal(t, commit, ver)
|
||||
tail := make([]byte, tailN)
|
||||
_, err = io.ReadFull(rc, tail)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, rc.Close())
|
||||
require.Equal(t, table[len(table)-tailN:], tail)
|
||||
|
||||
// 3) ReadAt-style ranged reads used by table readers.
|
||||
tr := &bsTableReaderAt{bs: bs, key: "table"}
|
||||
out := make([]byte, 4096)
|
||||
n, err := tr.ReadAtWithStats(ctx, out, 1234, stats)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, len(out), n)
|
||||
require.Equal(t, table[1234:1234+int64(len(out))], out)
|
||||
|
||||
// Near-end reads should return short read without error.
|
||||
out2 := make([]byte, 4096)
|
||||
start := int64(len(table) - 100)
|
||||
n, err = tr.ReadAtWithStats(ctx, out2, start, stats)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 100, n)
|
||||
require.Equal(t, table[start:], out2[:n])
|
||||
}
|
||||
@@ -0,0 +1,211 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gitrepo contains test helpers for creating and manipulating git repositories
|
||||
// using plumbing commands without requiring a working tree checkout.
|
||||
//
|
||||
// This package is intended for tests of GitBlobstore and related read paths. It
|
||||
// deliberately uses the git CLI (not a Go git library) to keep the harness small
|
||||
// and to match how the initial GitBlobstore implementation interacts with git.
|
||||
package gitrepo
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Repo is a test-only handle to a bare git repository (its directory is the GIT_DIR).
|
||||
type Repo struct {
|
||||
// GitDir is the path to the bare repository directory.
|
||||
GitDir string
|
||||
}
|
||||
|
||||
// InitBare initializes a new bare git repository at |dir|.
|
||||
// For portability across git versions, callers should generally pass a path that
|
||||
// does not exist yet.
|
||||
func InitBare(ctx context.Context, dir string) (*Repo, error) {
|
||||
if err := runGit(ctx, "", "", "", "init", "--bare", dir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Repo{GitDir: dir}, nil
|
||||
}
|
||||
|
||||
// InitBareTemp creates and initializes a new bare git repository under |parentDir|
|
||||
// (or os.TempDir if empty).
|
||||
func InitBareTemp(ctx context.Context, parentDir string) (*Repo, error) {
|
||||
if parentDir == "" {
|
||||
parentDir = os.TempDir()
|
||||
}
|
||||
dir, err := os.MkdirTemp(parentDir, "gitrepo-bare-")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// git init --bare expects the target directory to not exist in some versions;
|
||||
// to avoid that, create a child directory.
|
||||
bareDir := filepath.Join(dir, "repo.git")
|
||||
return InitBare(ctx, bareDir)
|
||||
}
|
||||
|
||||
// SetRefToTree writes a commit whose tree contains |files| and updates |ref| to point at it.
|
||||
// This is done without a working tree checkout using a temporary index (GIT_INDEX_FILE).
|
||||
//
|
||||
// - |ref| example: "refs/dolt/data"
|
||||
// - |files| keys are tree paths (e.g. "manifest", "a/b/c")
|
||||
// - |message| becomes the commit message (defaults to "test commit" if empty)
|
||||
func (r *Repo) SetRefToTree(ctx context.Context, ref string, files map[string][]byte, message string) (commitOID string, err error) {
|
||||
if message == "" {
|
||||
message = "test commit"
|
||||
}
|
||||
|
||||
indexDir, err := os.MkdirTemp("", "gitrepo-index-")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer func() {
|
||||
_ = os.RemoveAll(indexDir)
|
||||
}()
|
||||
|
||||
indexFile := filepath.Join(indexDir, "index")
|
||||
|
||||
// Empty index.
|
||||
if err := runGit(ctx, r.GitDir, indexFile, "", "read-tree", "--empty"); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Add paths. Sort for determinism.
|
||||
paths := make([]string, 0, len(files))
|
||||
for p := range files {
|
||||
paths = append(paths, p)
|
||||
}
|
||||
sort.Strings(paths)
|
||||
|
||||
for _, p := range paths {
|
||||
oid, err := hashObject(ctx, r.GitDir, files[p])
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := runGit(ctx, r.GitDir, indexFile, "", "update-index", "--add", "--cacheinfo", "100644", oid, p); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
treeOID, err := outputGit(ctx, r.GitDir, indexFile, nil, "write-tree")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
treeOID = strings.TrimSpace(treeOID)
|
||||
if treeOID == "" {
|
||||
return "", fmt.Errorf("write-tree returned empty oid")
|
||||
}
|
||||
|
||||
commitOID, err = outputGit(ctx, r.GitDir, "", commitEnv(), "commit-tree", treeOID, "-m", message)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
commitOID = strings.TrimSpace(commitOID)
|
||||
if commitOID == "" {
|
||||
return "", fmt.Errorf("commit-tree returned empty oid")
|
||||
}
|
||||
|
||||
if err := runGit(ctx, r.GitDir, "", "", "update-ref", ref, commitOID); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return commitOID, nil
|
||||
}
|
||||
|
||||
func commitEnv() []string {
|
||||
// Deterministic-ish author/committer identity for tests.
|
||||
return []string{
|
||||
"GIT_AUTHOR_NAME=gitrepo test",
|
||||
"GIT_AUTHOR_EMAIL=gitrepo@test.invalid",
|
||||
"GIT_COMMITTER_NAME=gitrepo test",
|
||||
"GIT_COMMITTER_EMAIL=gitrepo@test.invalid",
|
||||
}
|
||||
}
|
||||
|
||||
func hashObject(ctx context.Context, gitDir string, data []byte) (string, error) {
|
||||
out, err := outputGitWithStdin(ctx, gitDir, "", "", bytes.NewReader(data), "hash-object", "-w", "--stdin")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
oid := strings.TrimSpace(out)
|
||||
if oid == "" {
|
||||
return "", fmt.Errorf("hash-object returned empty oid")
|
||||
}
|
||||
return oid, nil
|
||||
}
|
||||
|
||||
func runGit(ctx context.Context, gitDir, indexFile string, extraEnv string, args ...string) error {
|
||||
_, err := outputGit(ctx, gitDir, indexFile, splitEnv(extraEnv), args...)
|
||||
return err
|
||||
}
|
||||
|
||||
func outputGit(ctx context.Context, gitDir, indexFile string, extraEnv []string, args ...string) (string, error) {
|
||||
cmd := exec.CommandContext(ctx, "git", args...) //nolint:gosec // test harness invokes git with controlled args.
|
||||
cmd.Env = envForGit(gitDir, indexFile, extraEnv)
|
||||
var buf bytes.Buffer
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
if err := cmd.Run(); err != nil {
|
||||
return "", fmt.Errorf("%w\ncommand: %s\noutput:\n%s", err, cmd.String(), strings.TrimRight(buf.String(), "\n"))
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
func outputGitWithStdin(ctx context.Context, gitDir, indexFile string, extraEnv string, stdin *bytes.Reader, args ...string) (string, error) {
|
||||
cmd := exec.CommandContext(ctx, "git", args...) //nolint:gosec // test harness invokes git with controlled args.
|
||||
cmd.Env = envForGit(gitDir, indexFile, splitEnv(extraEnv))
|
||||
cmd.Stdin = stdin
|
||||
var buf bytes.Buffer
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
if err := cmd.Run(); err != nil {
|
||||
return "", fmt.Errorf("%w\ncommand: %s\noutput:\n%s", err, cmd.String(), strings.TrimRight(buf.String(), "\n"))
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
func envForGit(gitDir, indexFile string, extra []string) []string {
|
||||
env := append([]string(nil), os.Environ()...)
|
||||
if gitDir != "" {
|
||||
env = append(env, "GIT_DIR="+gitDir)
|
||||
}
|
||||
if indexFile != "" {
|
||||
env = append(env, "GIT_INDEX_FILE="+indexFile)
|
||||
}
|
||||
env = append(env, extra...)
|
||||
return env
|
||||
}
|
||||
|
||||
func splitEnv(extraEnv string) []string {
|
||||
if extraEnv == "" {
|
||||
return nil
|
||||
}
|
||||
// Allow callers to pass "K=V\nK2=V2" style strings.
|
||||
lines := strings.Split(extraEnv, "\n")
|
||||
out := lines[:0]
|
||||
for _, l := range lines {
|
||||
l = strings.TrimSpace(l)
|
||||
if l != "" {
|
||||
out = append(out, l)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright 2026 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package gitrepo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInitBareAndSetRefToTree(t *testing.T) {
|
||||
if _, err := exec.LookPath("git"); err != nil {
|
||||
t.Skip("git not found on PATH")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
root := t.TempDir()
|
||||
bareDir := filepath.Join(root, "repo.git")
|
||||
|
||||
repo, err := InitBare(ctx, bareDir)
|
||||
if err != nil {
|
||||
t.Fatalf("InitBare failed: %v", err)
|
||||
}
|
||||
|
||||
commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{
|
||||
"manifest": []byte("hello\n"),
|
||||
"dir/file": []byte("abc"),
|
||||
"dir/file2": []byte("def"),
|
||||
"dir2/x.txt": []byte("xyz"),
|
||||
}, "seed refs/dolt/data")
|
||||
if err != nil {
|
||||
t.Fatalf("SetRefToTree failed: %v", err)
|
||||
}
|
||||
if len(strings.TrimSpace(commit)) == 0 {
|
||||
t.Fatalf("expected non-empty commit oid")
|
||||
}
|
||||
|
||||
// Validate the path exists in the commit.
|
||||
cmd := exec.CommandContext(ctx, "git", "--git-dir", repo.GitDir, "cat-file", "-e", commit+":manifest") //nolint:gosec
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
t.Fatalf("cat-file -e failed: %v\n%s", err, string(out))
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,7 @@ paths=`find . -maxdepth 1 -mindepth 1 \( -type d -print -o -type f -name '*.go'
|
||||
goimports -w -local github.com/dolthub/dolt,github.com/dolthub/eventsapi_schema $paths
|
||||
|
||||
bad_files=$(find $paths -name '*.go' | while read f; do
|
||||
if [[ $(awk '/import \(/{flag=1;next}/\)/{flag=0}flag' < $f | egrep -c '$^') -gt 2 ]]; then
|
||||
if [[ $(awk '/import \(/{flag=1;next}/\)/{flag=0}flag' < $f | grep -Ec '$^') -gt 2 ]]; then
|
||||
echo $f
|
||||
fi
|
||||
done)
|
||||
|
||||
Reference in New Issue
Block a user