mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-30 03:26:47 -05:00
Merge pull request #9850 from dolthub/macneale4-claude/archive-debug
[no-release-notes] add `dolt admin archive-inspect` cmd
This commit is contained in:
@@ -26,5 +26,6 @@ var Commands = cli.NewHiddenSubCommandHandler("admin", "Commands for directly wo
|
||||
StorageCmd{},
|
||||
NewGenToOldGenCmd{},
|
||||
ConjoinCmd{},
|
||||
ArchiveInspectCmd{},
|
||||
createchunk.Commands,
|
||||
})
|
||||
|
||||
@@ -0,0 +1,270 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package admin
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/dolt/go/cmd/dolt/cli"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/argparser"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/nbs"
|
||||
)
|
||||
|
||||
type ArchiveInspectCmd struct {
|
||||
}
|
||||
|
||||
func (cmd ArchiveInspectCmd) Name() string {
|
||||
return "archive-inspect"
|
||||
}
|
||||
|
||||
func (cmd ArchiveInspectCmd) Description() string {
|
||||
return "Inspect a Dolt archive (.darc) file and display basic information about it."
|
||||
}
|
||||
|
||||
func (cmd ArchiveInspectCmd) RequiresRepo() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (cmd ArchiveInspectCmd) Docs() *cli.CommandDocumentation {
|
||||
return &cli.CommandDocumentation{
|
||||
ShortDesc: "Inspect a Dolt archive (.darc) file and display information about it",
|
||||
LongDesc: `Inspects a Dolt archive (.darc) file and displays detailed information about its structure, contents, and metadata.
|
||||
|
||||
Archive files are compressed collections of chunks used by Dolt for storage. This command provides debugging and inspection capabilities for these files.
|
||||
|
||||
This command takes a path to an archive file, and ignores any database information that would otherwise be provided. To skip wasting time, run this command outside of a Dolt repository.'
|
||||
|
||||
Basic usage displays archive metadata, structure information, and statistics. Advanced usage allows inspection of specific chunks by object ID or raw index positions.`,
|
||||
Synopsis: []string{
|
||||
"[--mmap] <archive-path>",
|
||||
"[--mmap] --object-id <hash> <archive-path>",
|
||||
"[--mmap] --inspect-index <index> <archive-path>",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (cmd ArchiveInspectCmd) ArgParser() *argparser.ArgParser {
|
||||
ap := argparser.NewArgParserWithMaxArgs(cmd.Name(), 1)
|
||||
ap.SupportsString("archive-path", "", "archive_path", "Path to the archive file (.darc) to inspect")
|
||||
ap.SupportsFlag("mmap", "", "Enable memory-mapped index reading. Default is to load index into memory.")
|
||||
ap.SupportsString("object-id", "", "object_id", "Base32-encoded 20-byte object ID to inspect within the archive")
|
||||
ap.SupportsString("inspect-index", "", "index", "Inspect raw index reader data at specific index position")
|
||||
return ap
|
||||
}
|
||||
|
||||
func (cmd ArchiveInspectCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv, cliCtx cli.CliContext) int {
|
||||
ap := cmd.ArgParser()
|
||||
usage, _ := cli.HelpAndUsagePrinters(cli.CommandDocsForCommandString(commandStr, cli.CommandDocumentationContent{}, ap))
|
||||
apr := cli.ParseArgsOrDie(ap, args, usage)
|
||||
|
||||
var archivePath string
|
||||
if archivePathArg, ok := apr.GetValue("archive-path"); ok {
|
||||
archivePath = archivePathArg
|
||||
} else if apr.NArg() == 1 {
|
||||
archivePath = apr.Arg(0)
|
||||
} else {
|
||||
usage()
|
||||
return 1
|
||||
}
|
||||
|
||||
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||
cli.PrintErrln("Error: Archive file does not exist:", archivePath)
|
||||
return 1
|
||||
}
|
||||
if !strings.HasSuffix(strings.ToLower(archivePath), nbs.ArchiveFileSuffix) {
|
||||
cli.PrintErrln("Warning: File does not have .darc extension")
|
||||
}
|
||||
|
||||
absPath, err := filepath.Abs(archivePath)
|
||||
if err != nil {
|
||||
cli.PrintErrln("Error getting absolute path:", err.Error())
|
||||
return 1
|
||||
}
|
||||
|
||||
enableMmap := apr.Contains("mmap")
|
||||
inspector, err := nbs.NewArchiveInspectorFromFileWithMmap(ctx, absPath, enableMmap)
|
||||
if err != nil {
|
||||
cli.PrintErrln("Error opening archive file:", err.Error())
|
||||
return 1
|
||||
}
|
||||
defer inspector.Close()
|
||||
|
||||
cli.Println("Archive file:", absPath)
|
||||
cli.Printf("File size: %d bytes\n", inspector.FileSize())
|
||||
cli.Printf("Format version: %d\n", inspector.FormatVersion())
|
||||
cli.Printf("File signature: %s\n", inspector.FileSignature())
|
||||
cli.Println()
|
||||
|
||||
cli.Printf("Chunk count: %d\n", inspector.ChunkCount())
|
||||
cli.Printf("Byte span count: %d\n", inspector.ByteSpanCount())
|
||||
cli.Printf("Index size: %d bytes\n", inspector.IndexSize())
|
||||
cli.Printf("Metadata size: %d bytes\n", inspector.MetadataSize())
|
||||
|
||||
// Display metadata if present
|
||||
if inspector.MetadataSize() > 0 {
|
||||
cli.Println()
|
||||
cli.Println("Metadata:")
|
||||
metadataBytes, err := inspector.GetMetadata(ctx)
|
||||
if err != nil {
|
||||
cli.PrintErrln("Error reading metadata:", err.Error())
|
||||
} else {
|
||||
// Try to parse as JSON and pretty print
|
||||
var metadataObj interface{}
|
||||
if err := json.Unmarshal(metadataBytes, &metadataObj); err == nil {
|
||||
prettyJSON, _ := json.MarshalIndent(metadataObj, " ", " ")
|
||||
cli.Printf(" %s\n", string(prettyJSON))
|
||||
} else {
|
||||
// If not JSON, just print as ascii. To date we don't have any non-JSON metadata.
|
||||
cli.Printf(" %s\n", string(metadataBytes))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cli.Println("Metadata: none")
|
||||
}
|
||||
|
||||
if objectIdStr, ok := apr.GetValue("object-id"); ok {
|
||||
cli.Println()
|
||||
cli.Println("Object inspection:")
|
||||
|
||||
objectHash, hashOk := hash.MaybeParse(objectIdStr)
|
||||
if !hashOk {
|
||||
cli.PrintErrln("Error: Invalid object ID format. Expected 32-character base32 encoded hash.")
|
||||
return 1
|
||||
}
|
||||
|
||||
debugInfo := inspector.SearchChunkDebug(objectHash)
|
||||
|
||||
cli.Printf("Hash: %s\n", debugInfo.Hash)
|
||||
cli.Printf("Prefix: 0x%x\n", debugInfo.Prefix)
|
||||
cli.Printf("Suffix: 0x%x\n", debugInfo.Suffix)
|
||||
cli.Printf("Index reader type: %s\n", debugInfo.IndexReaderType)
|
||||
cli.Printf("Chunk count: %d\n", debugInfo.ChunkCount)
|
||||
cli.Printf("Possible match index: %d\n", debugInfo.PossibleMatch)
|
||||
cli.Printf("Valid range: %t\n", debugInfo.ValidRange)
|
||||
cli.Printf("Final search result: %d\n", debugInfo.FinalResult)
|
||||
|
||||
cli.Printf("Prefix matches found: %d\n", len(debugInfo.Matches))
|
||||
for i, match := range debugInfo.Matches {
|
||||
cli.Printf(" Match %d: index=%d, suffixMatch=%t, suffix=0x%x\n",
|
||||
i, match.Index, match.SuffixMatch, match.SuffixAtIdx)
|
||||
}
|
||||
cli.Println()
|
||||
|
||||
// Look up the object in the archive
|
||||
chunkInfo, err := inspector.GetChunkInfo(ctx, objectHash)
|
||||
if err != nil {
|
||||
cli.PrintErrln("Error inspecting object:", err.Error())
|
||||
return 1
|
||||
}
|
||||
|
||||
if chunkInfo == nil {
|
||||
cli.Printf("Object %s not found in archive\n", objectIdStr)
|
||||
} else {
|
||||
cli.Printf("Compression type: %s\n", chunkInfo.CompressionType)
|
||||
cli.Printf("Dictionary byte span ID: %d\n", chunkInfo.DictionaryID)
|
||||
cli.Printf("Data byte span ID: %d\n", chunkInfo.DataID)
|
||||
|
||||
if chunkInfo.DictionaryByteSpan.Length > 0 {
|
||||
cli.Printf("Dictionary byte span: offset=%d, length=%d\n",
|
||||
chunkInfo.DictionaryByteSpan.Offset, chunkInfo.DictionaryByteSpan.Length)
|
||||
} else {
|
||||
cli.Println("Dictionary byte span: none (empty)")
|
||||
}
|
||||
|
||||
cli.Printf("Data byte span: offset=%d, length=%d\n",
|
||||
chunkInfo.DataByteSpan.Offset, chunkInfo.DataByteSpan.Length)
|
||||
}
|
||||
}
|
||||
|
||||
// Handle inspect-index if provided
|
||||
if indexStr, ok := apr.GetValue("inspect-index"); ok {
|
||||
cli.Println()
|
||||
cli.Println("Index inspection:")
|
||||
|
||||
// Parse the index
|
||||
indexVal, err := strconv.ParseUint(indexStr, 10, 32)
|
||||
if err != nil {
|
||||
cli.PrintErrln("Error: Invalid index format. Expected unsigned integer.")
|
||||
return 1
|
||||
}
|
||||
|
||||
idx := uint32(indexVal)
|
||||
details := inspector.GetIndexReaderDetails(idx)
|
||||
|
||||
// Print all details
|
||||
cli.Printf("Index: %d\n", details.RequestedIndex)
|
||||
cli.Printf("Index reader type: %s\n", details.IndexReaderType)
|
||||
cli.Printf("Chunk count: %d\n", details.ChunkCount)
|
||||
cli.Printf("Byte span count: %d\n", details.ByteSpanCount)
|
||||
|
||||
if details.Error != "" {
|
||||
cli.Printf("Error: %s\n", details.Error)
|
||||
return 1
|
||||
}
|
||||
|
||||
cli.Printf("Hash: %s\n", details.Hash)
|
||||
cli.Printf("Prefix: 0x%x\n", details.Prefix)
|
||||
cli.Printf("Suffix: 0x%x\n", details.Suffix)
|
||||
cli.Printf("Dictionary ID: %d\n", details.DictionaryID)
|
||||
cli.Printf("Data ID: %d\n", details.DataID)
|
||||
|
||||
// Show implementation-specific details
|
||||
cli.Println()
|
||||
cli.Println("Implementation details:")
|
||||
|
||||
// Show common calculation details first
|
||||
if details.ExpectedSuffixStart != 0 || details.ExpectedSuffixEnd != 0 {
|
||||
cli.Printf("Expected suffix start: %d\n", details.ExpectedSuffixStart)
|
||||
cli.Printf("Expected suffix end: %d\n", details.ExpectedSuffixEnd)
|
||||
}
|
||||
|
||||
// Show in-memory specific details
|
||||
if details.PrefixArrayLength > 0 {
|
||||
cli.Printf("Storage type: In-memory arrays\n")
|
||||
cli.Printf("Prefix array length: %d\n", details.PrefixArrayLength)
|
||||
cli.Printf("Suffix array length: %d\n", details.SuffixArrayLength)
|
||||
cli.Printf("Chunk ref array length: %d\n", details.ChunkRefArrayLength)
|
||||
cli.Printf("Span index array length: %d\n", details.SpanIndexArrayLength)
|
||||
cli.Printf("Suffix array bounds valid: %t\n", details.SuffixArrayBounds)
|
||||
}
|
||||
|
||||
// Show mmap specific details
|
||||
if details.MmapIndexSize > 0 {
|
||||
cli.Printf("Storage type: Memory-mapped file\n")
|
||||
cli.Printf("Span index offset: %d\n", details.SpanIndexOffset)
|
||||
cli.Printf("Prefixes offset: %d\n", details.PrefixesOffset)
|
||||
cli.Printf("Chunk refs offset: %d\n", details.ChunkRefsOffset)
|
||||
cli.Printf("Suffixes offset: %d\n", details.SuffixesOffset)
|
||||
cli.Printf("Actual suffix file offset: %d\n", details.ActualSuffixOffset)
|
||||
}
|
||||
|
||||
// Show raw suffix bytes for both implementations
|
||||
if len(details.RawSuffixBytes) > 0 {
|
||||
cli.Printf("Raw suffix bytes: %x\n", details.RawSuffixBytes)
|
||||
}
|
||||
if details.RawSuffixBytesError != "" {
|
||||
cli.Printf("Raw suffix bytes error: %s\n", details.RawSuffixBytesError)
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
@@ -0,0 +1,348 @@
|
||||
// Copyright 2025 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
|
||||
// ByteSpanInfo provides information about a byte span in the archive
|
||||
type ByteSpanInfo struct {
|
||||
Offset uint64
|
||||
Length uint64
|
||||
}
|
||||
|
||||
// ChunkInfo contains information about a chunk within the archive
|
||||
type ChunkInfo struct {
|
||||
CompressionType string
|
||||
DictionaryID uint32
|
||||
DataID uint32
|
||||
DictionaryByteSpan ByteSpanInfo
|
||||
DataByteSpan ByteSpanInfo
|
||||
}
|
||||
|
||||
// SearchDebugInfo contains detailed information about a chunk search operation
|
||||
type SearchDebugInfo struct {
|
||||
Hash string
|
||||
Prefix uint64
|
||||
Suffix []byte
|
||||
PossibleMatch int32
|
||||
ChunkCount uint32
|
||||
IndexReaderType string
|
||||
ValidRange bool
|
||||
FinalResult int
|
||||
Matches []PrefixMatch
|
||||
}
|
||||
|
||||
// PrefixMatch contains information about a chunk that matches the search prefix
|
||||
type PrefixMatch struct {
|
||||
Index uint32
|
||||
SuffixAtIdx []byte
|
||||
SuffixMatch bool
|
||||
}
|
||||
|
||||
// IndexReaderDetails contains detailed information about index reader state
|
||||
type IndexReaderDetails struct {
|
||||
IndexReaderType string
|
||||
RequestedIndex uint32
|
||||
ChunkCount uint32
|
||||
ByteSpanCount uint32
|
||||
Error string
|
||||
Hash string
|
||||
Prefix uint64
|
||||
Suffix []byte
|
||||
DictionaryID uint32
|
||||
DataID uint32
|
||||
|
||||
// In-memory reader specific fields
|
||||
PrefixArrayLength int
|
||||
SuffixArrayLength int
|
||||
ChunkRefArrayLength int
|
||||
SpanIndexArrayLength int
|
||||
ExpectedSuffixStart uint32
|
||||
ExpectedSuffixEnd uint32
|
||||
SuffixArrayBounds bool
|
||||
RawSuffixBytes []byte
|
||||
|
||||
// Memory-mapped reader specific fields
|
||||
MmapIndexSize uint64
|
||||
MmapByteSpanCount uint32
|
||||
MmapChunkCount uint32
|
||||
SpanIndexOffset uint64
|
||||
PrefixesOffset uint64
|
||||
ChunkRefsOffset uint64
|
||||
SuffixesOffset uint64
|
||||
ActualSuffixOffset uint64
|
||||
RawSuffixBytesError string
|
||||
}
|
||||
|
||||
// ArchiveInspector provides a way to inspect archive files from outside the nbs package. Intended for debugging and inspection,
|
||||
// currently only used by the `dolt admin archive-inspect` command.
|
||||
type ArchiveInspector struct {
|
||||
reader archiveReader
|
||||
}
|
||||
|
||||
// NewArchiveInspectorFromFileWithMmap creates an ArchiveInspector from a file path with configurable mmap
|
||||
func NewArchiveInspectorFromFileWithMmap(ctx context.Context, archivePath string, enableMmap bool) (*ArchiveInspector, error) {
|
||||
fra, err := newFileReaderAt(archivePath, enableMmap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Use a dummy hash since it's not needed when we have the file reader already.
|
||||
dummyHash := hash.Hash{}
|
||||
stats := &Stats{}
|
||||
|
||||
archiveReader, err := newArchiveReader(ctx, fra, dummyHash, uint64(fra.sz), stats)
|
||||
if err != nil {
|
||||
fra.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &ArchiveInspector{reader: archiveReader}, nil
|
||||
}
|
||||
|
||||
// Close releases resources associated with the archive inspector
|
||||
func (ai *ArchiveInspector) Close() error {
|
||||
return ai.reader.close()
|
||||
}
|
||||
|
||||
// ChunkCount returns the number of chunks in the archive
|
||||
func (ai *ArchiveInspector) ChunkCount() uint32 {
|
||||
return ai.reader.count()
|
||||
}
|
||||
|
||||
// FormatVersion returns the format version of the archive
|
||||
func (ai *ArchiveInspector) FormatVersion() uint8 {
|
||||
return ai.reader.footer.formatVersion
|
||||
}
|
||||
|
||||
// FileSignature returns the file signature of the archive
|
||||
func (ai *ArchiveInspector) FileSignature() string {
|
||||
return ai.reader.footer.fileSignature
|
||||
}
|
||||
|
||||
// IndexSize returns the size of the index section in bytes
|
||||
func (ai *ArchiveInspector) IndexSize() uint64 {
|
||||
return ai.reader.footer.indexSize
|
||||
}
|
||||
|
||||
// MetadataSize returns the size of the metadata section in bytes
|
||||
func (ai *ArchiveInspector) MetadataSize() uint32 {
|
||||
return ai.reader.footer.metadataSize
|
||||
}
|
||||
|
||||
// FileSize returns the total size of the archive file
|
||||
func (ai *ArchiveInspector) FileSize() uint64 {
|
||||
return ai.reader.footer.fileSize
|
||||
}
|
||||
|
||||
// ByteSpanCount returns the number of byte spans in the archive
|
||||
func (ai *ArchiveInspector) ByteSpanCount() uint32 {
|
||||
return ai.reader.footer.byteSpanCount
|
||||
}
|
||||
|
||||
// GetMetadata retrieves the metadata from the archive as raw bytes
|
||||
func (ai *ArchiveInspector) GetMetadata(ctx context.Context) ([]byte, error) {
|
||||
stats := &Stats{}
|
||||
return ai.reader.getMetadata(ctx, stats)
|
||||
}
|
||||
|
||||
// SearchChunk exposes the underlying search method for debugging
|
||||
func (ai *ArchiveInspector) SearchChunk(h hash.Hash) int {
|
||||
return ai.reader.search(h)
|
||||
}
|
||||
|
||||
// SearchChunkDebug exposes detailed search information for debugging
|
||||
func (ai *ArchiveInspector) SearchChunkDebug(h hash.Hash) *SearchDebugInfo {
|
||||
prefix := h.Prefix()
|
||||
possibleMatch := ai.reader.indexReader.searchPrefix(prefix)
|
||||
targetSfx := h.Suffix()
|
||||
|
||||
debug := &SearchDebugInfo{
|
||||
Hash: h.String(),
|
||||
Prefix: prefix,
|
||||
Suffix: targetSfx,
|
||||
PossibleMatch: possibleMatch,
|
||||
ChunkCount: ai.reader.footer.chunkCount,
|
||||
IndexReaderType: fmt.Sprintf("%T", ai.reader.indexReader),
|
||||
}
|
||||
|
||||
// Check if possibleMatch is in valid range
|
||||
if possibleMatch < 0 || uint32(possibleMatch) >= ai.reader.footer.chunkCount {
|
||||
debug.ValidRange = false
|
||||
debug.FinalResult = -1
|
||||
return debug
|
||||
}
|
||||
|
||||
debug.ValidRange = true
|
||||
|
||||
// Check prefix matches in the range
|
||||
matches := []PrefixMatch{}
|
||||
for idx := uint32(possibleMatch); idx < ai.reader.footer.chunkCount && ai.reader.indexReader.getPrefix(idx) == prefix; idx++ {
|
||||
suffixAtIdx := ai.reader.indexReader.getSuffix(idx)
|
||||
match := PrefixMatch{
|
||||
Index: idx,
|
||||
SuffixAtIdx: suffixAtIdx[:],
|
||||
SuffixMatch: suffixAtIdx == suffix(targetSfx),
|
||||
}
|
||||
matches = append(matches, match)
|
||||
|
||||
if suffixAtIdx == suffix(targetSfx) {
|
||||
debug.FinalResult = int(idx)
|
||||
debug.Matches = matches
|
||||
return debug
|
||||
}
|
||||
}
|
||||
|
||||
debug.Matches = matches
|
||||
debug.FinalResult = -1
|
||||
return debug
|
||||
}
|
||||
|
||||
// GetIndexReaderDetails exposes internal index reader state for debugging
|
||||
func (ai *ArchiveInspector) GetIndexReaderDetails(idx uint32) *IndexReaderDetails {
|
||||
details := &IndexReaderDetails{
|
||||
IndexReaderType: fmt.Sprintf("%T", ai.reader.indexReader),
|
||||
RequestedIndex: idx,
|
||||
ChunkCount: ai.reader.footer.chunkCount,
|
||||
ByteSpanCount: ai.reader.footer.byteSpanCount,
|
||||
}
|
||||
|
||||
if idx >= ai.reader.footer.chunkCount {
|
||||
details.Error = "index out of range"
|
||||
return details
|
||||
}
|
||||
|
||||
// Get prefix and suffix
|
||||
prefix := ai.reader.indexReader.getPrefix(idx)
|
||||
suffix := ai.reader.indexReader.getSuffix(idx)
|
||||
|
||||
details.Prefix = prefix
|
||||
details.Suffix = suffix[:]
|
||||
|
||||
// Construct the full hash from prefix and suffix
|
||||
hashBytes := make([]byte, hash.ByteLen)
|
||||
binary.BigEndian.PutUint64(hashBytes[:hash.PrefixLen], prefix)
|
||||
copy(hashBytes[hash.PrefixLen:], suffix[:])
|
||||
reconstructedHash := hash.New(hashBytes)
|
||||
details.Hash = reconstructedHash.String()
|
||||
|
||||
// Get chunk references
|
||||
dictID, dataID := ai.reader.indexReader.getChunkRef(idx)
|
||||
details.DictionaryID = dictID
|
||||
details.DataID = dataID
|
||||
|
||||
// For in-memory reader, expose the raw array details
|
||||
if inMem, ok := ai.reader.indexReader.(*inMemoryArchiveIndexReader); ok {
|
||||
details.PrefixArrayLength = len(inMem.prefixes)
|
||||
details.SuffixArrayLength = len(inMem.suffixes)
|
||||
details.ChunkRefArrayLength = len(inMem.chunkRefs)
|
||||
details.SpanIndexArrayLength = len(inMem.spanIndex)
|
||||
|
||||
// Calculate expected suffix position
|
||||
expectedSuffixStart := idx * hash.SuffixLen
|
||||
details.ExpectedSuffixStart = expectedSuffixStart
|
||||
details.ExpectedSuffixEnd = expectedSuffixStart + hash.SuffixLen
|
||||
details.SuffixArrayBounds = expectedSuffixStart+hash.SuffixLen <= uint32(len(inMem.suffixes))
|
||||
|
||||
// Show raw bytes around the suffix position for debugging
|
||||
if expectedSuffixStart < uint32(len(inMem.suffixes)) {
|
||||
end := expectedSuffixStart + hash.SuffixLen
|
||||
if end > uint32(len(inMem.suffixes)) {
|
||||
end = uint32(len(inMem.suffixes))
|
||||
}
|
||||
details.RawSuffixBytes = inMem.suffixes[expectedSuffixStart:end]
|
||||
}
|
||||
}
|
||||
|
||||
// For mmap reader, expose similar details
|
||||
if mmapReader, isMmap := ai.reader.indexReader.(*mmapIndexReader); isMmap {
|
||||
details.MmapIndexSize = mmapReader.indexSize
|
||||
details.MmapByteSpanCount = mmapReader.byteSpanCount
|
||||
details.MmapChunkCount = mmapReader.chunkCount
|
||||
details.SpanIndexOffset = mmapReader.spanIndexOffset
|
||||
details.PrefixesOffset = mmapReader.prefixesOffset
|
||||
details.ChunkRefsOffset = mmapReader.chunkRefsOffset
|
||||
details.SuffixesOffset = mmapReader.suffixesOffset
|
||||
|
||||
// Calculate expected suffix position in mmap
|
||||
expectedSuffixStart := uint64(idx) * hash.SuffixLen
|
||||
actualSuffixOffset := mmapReader.suffixesOffset + expectedSuffixStart
|
||||
details.ExpectedSuffixStart = uint32(expectedSuffixStart)
|
||||
details.ExpectedSuffixEnd = uint32(expectedSuffixStart + hash.SuffixLen)
|
||||
details.ActualSuffixOffset = actualSuffixOffset
|
||||
|
||||
// Try to read raw bytes around the suffix position
|
||||
if mmapReader.data != nil {
|
||||
rawBytes := make([]byte, hash.SuffixLen)
|
||||
_, err := mmapReader.data.ReadAt(rawBytes, int64(actualSuffixOffset))
|
||||
if err == nil {
|
||||
details.RawSuffixBytes = rawBytes
|
||||
} else {
|
||||
details.RawSuffixBytesError = err.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return details
|
||||
}
|
||||
|
||||
// GetChunkInfo looks up information about a specific chunk in the archive
|
||||
func (ai *ArchiveInspector) GetChunkInfo(ctx context.Context, h hash.Hash) (*ChunkInfo, error) {
|
||||
idx := ai.reader.search(h)
|
||||
if idx < 0 {
|
||||
return nil, fmt.Errorf("chunk %s not found", h.String())
|
||||
}
|
||||
|
||||
// Get the chunk reference (dictionary ID and data ID)
|
||||
dictID, dataID := ai.reader.getChunkRef(idx)
|
||||
|
||||
dictByteSpan := ai.reader.getByteSpanByID(dictID)
|
||||
dataByteSpan := ai.reader.getByteSpanByID(dataID)
|
||||
|
||||
compressionType := "unknown"
|
||||
formatVersion := ai.reader.footer.formatVersion
|
||||
|
||||
if dictID == 0 {
|
||||
// Dictionary ID 0 means no dictionary
|
||||
if formatVersion == 1 {
|
||||
compressionType = "zstd (no dictionary)"
|
||||
} else if formatVersion >= 2 {
|
||||
compressionType = "snappy"
|
||||
}
|
||||
} else {
|
||||
// Dictionary ID > 0 means zstd with dictionary
|
||||
compressionType = "zstd (with dictionary)"
|
||||
}
|
||||
|
||||
return &ChunkInfo{
|
||||
CompressionType: compressionType,
|
||||
DictionaryID: dictID,
|
||||
DataID: dataID,
|
||||
DictionaryByteSpan: ByteSpanInfo{
|
||||
Offset: dictByteSpan.offset,
|
||||
Length: dictByteSpan.length,
|
||||
},
|
||||
DataByteSpan: ByteSpanInfo{
|
||||
Offset: dataByteSpan.offset,
|
||||
Length: dataByteSpan.length,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
@@ -323,7 +323,7 @@ func (f *inMemoryArchiveIndexReader) getChunkRef(idx uint32) (dict, data uint32)
|
||||
}
|
||||
|
||||
func (f *inMemoryArchiveIndexReader) getSuffix(idx uint32) suffix {
|
||||
if idx >= uint32(len(f.suffixes)/hash.SuffixLen) {
|
||||
if idx >= f.getNumChunks() {
|
||||
return suffix{}
|
||||
}
|
||||
start := uint64(idx) * hash.SuffixLen
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
#! /usr/bin/env bats
|
||||
load $BATS_TEST_DIRNAME/helper/common.bash
|
||||
|
||||
ARCHIVE_PATH="$BATS_TEST_DIRNAME/archive-test-repos/v2/noms/oldgen/27avtn2a3upddh52eu750m4709gfps7s.darc"
|
||||
|
||||
setup() {
|
||||
setup_no_dolt_init
|
||||
}
|
||||
|
||||
teardown() {
|
||||
teardown_common
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: basic archive inspection" {
|
||||
run dolt admin archive-inspect "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Archive file:" ]] || false
|
||||
[[ "$output" =~ "File size:" ]] || false
|
||||
[[ "$output" =~ "Format version:" ]] || false
|
||||
[[ "$output" =~ "File signature:" ]] || false
|
||||
[[ "$output" =~ "Chunk count:" ]] || false
|
||||
[[ "$output" =~ "Byte span count:" ]] || false
|
||||
[[ "$output" =~ "Index size:" ]] || false
|
||||
[[ "$output" =~ "Metadata size:" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: archive file must exist" {
|
||||
run dolt admin archive-inspect "/nonexistent/file.darc"
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "$output" =~ "Error: Archive file does not exist:" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: mmap flag works" {
|
||||
run dolt admin archive-inspect --mmap "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Archive file:" ]] || false
|
||||
[[ "$output" =~ "Chunk count:" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: object-id inspection with invalid hash" {
|
||||
run dolt admin archive-inspect --object-id "invalid" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "$output" =~ "Error: Invalid object ID format. Expected 32-character base32 encoded hash." ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: object-id inspection with valid hash format but not found" {
|
||||
run dolt admin archive-inspect --object-id "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "$output" =~ "Object inspection:" ]] || false
|
||||
[[ "$output" =~ "Hash:" ]] || false
|
||||
[[ "$output" =~ "Prefix:" ]] || false
|
||||
[[ "$output" =~ "Suffix:" ]] || false
|
||||
[[ "$output" =~ "Error inspecting object:" ]] || false
|
||||
[[ "$output" =~ "not found" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: object-id inspection with existing hash" {
|
||||
# Use the hash we know exists at index 42
|
||||
run dolt admin archive-inspect --object-id "4pguchpitq1bsb09ogaivmcstgsnbd3k" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Object inspection:" ]] || false
|
||||
[[ "$output" =~ "Hash: 4pguchpitq1bsb09ogaivmcstgsnbd3k" ]] || false
|
||||
[[ "$output" =~ "Prefix: 0x2661e64732ee82be" ]] || false
|
||||
[[ "$output" =~ "Suffix: 0x2c09c4152fd99cec3975b474" ]] || false
|
||||
[[ "$output" =~ "Possible match index: 42" ]] || false
|
||||
[[ "$output" =~ "Compression type: zstd (with dictionary" ]] || false
|
||||
[[ "$output" =~ "Dictionary byte span ID: 1" ]] || false
|
||||
[[ "$output" =~ "Data byte span ID: 70" ]] || false
|
||||
[[ "$output" =~ "Dictionary byte span: offset=0, length=296" ]] || false
|
||||
[[ "$output" =~ "Data byte span: offset=20850, length=43" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: object-id inspection with existing hash with mmap" {
|
||||
# Use the hash we know exists at index 42
|
||||
run dolt admin archive-inspect --mmap --object-id "4pguchpitq1bsb09ogaivmcstgsnbd3k" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Object inspection:" ]] || false
|
||||
[[ "$output" =~ "Hash: 4pguchpitq1bsb09ogaivmcstgsnbd3k" ]] || false
|
||||
[[ "$output" =~ "Prefix: 0x2661e64732ee82be" ]] || false
|
||||
[[ "$output" =~ "Suffix: 0x2c09c4152fd99cec3975b474" ]] || false
|
||||
[[ "$output" =~ "Possible match index: 42" ]] || false
|
||||
[[ "$output" =~ "Compression type: zstd (with dictionary" ]] || false
|
||||
[[ "$output" =~ "Dictionary byte span ID: 1" ]] || false
|
||||
[[ "$output" =~ "Data byte span ID: 70" ]] || false
|
||||
[[ "$output" =~ "Dictionary byte span: offset=0, length=296" ]] || false
|
||||
[[ "$output" =~ "Data byte span: offset=20850, length=43" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: inspect-index with invalid index" {
|
||||
run dolt admin archive-inspect --inspect-index "invalid" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "$output" =~ "Error: Invalid index format. Expected unsigned integer." ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: inspect-index with valid index" {
|
||||
run dolt admin archive-inspect --inspect-index "0" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Index inspection:" ]] || false
|
||||
[[ "$output" =~ "Index: 0" ]] || false
|
||||
[[ "$output" =~ "Index reader type: *nbs.inMemoryArchiveIndexReader" ]] || false
|
||||
[[ "$output" =~ "Chunk count: 230" ]] || false
|
||||
[[ "$output" =~ "Byte span count: 231" ]] || false
|
||||
[[ "$output" =~ "Hash: 03fe1b95i4bqpetk2klb46devv1saqmd" ]] || false
|
||||
[[ "$output" =~ "Prefix: 0xdee0ad259117ac" ]] || false
|
||||
[[ "$output" =~ "Suffix: 0xbbb4152ab219aeffc3c56acd" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: inspect-index with valid index and mmap" {
|
||||
run dolt admin archive-inspect --mmap --inspect-index "0" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Index inspection:" ]] || false
|
||||
[[ "$output" =~ "Index: 0" ]] || false
|
||||
[[ "$output" =~ "Index reader type: *nbs.mmapIndexReader" ]] || false
|
||||
[[ "$output" =~ "Chunk count: 230" ]] || false
|
||||
[[ "$output" =~ "Byte span count: 231" ]] || false
|
||||
[[ "$output" =~ "Hash: 03fe1b95i4bqpetk2klb46devv1saqmd" ]] || false
|
||||
[[ "$output" =~ "Prefix: 0xdee0ad259117ac" ]] || false
|
||||
[[ "$output" =~ "Suffix: 0xbbb4152ab219aeffc3c56acd" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: inspect-index with out of range index" {
|
||||
# 230 chunks, so index 231 is out of range
|
||||
run dolt admin archive-inspect --inspect-index "231" "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "$output" =~ "Error: index out of range" ]] || false
|
||||
}
|
||||
|
||||
@test "admin-archive-inspect: mmap and non-mmap produce similar output format" {
|
||||
run dolt admin archive-inspect "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
output_nommap="$output"
|
||||
|
||||
run dolt admin archive-inspect --mmap "$ARCHIVE_PATH"
|
||||
[ "$status" -eq 0 ]
|
||||
output_mmap="$output"
|
||||
|
||||
# Both should have the same basic structure
|
||||
[[ "$output_nommap" =~ "Archive file:" ]] || false
|
||||
[[ "$output_mmap" =~ "Archive file:" ]] || false
|
||||
[[ "$output_nommap" =~ "Chunk count:" ]] || false
|
||||
[[ "$output_mmap" =~ "Chunk count:" ]] || false
|
||||
}
|
||||
@@ -142,6 +142,7 @@ SKIP_SERVER_TESTS=$(cat <<-EOM
|
||||
~import-no-header-csv.bats~
|
||||
~import-no-header-psv.bats~
|
||||
~admin-conjoin.bats~
|
||||
~admin-archive-inspect.bats~
|
||||
EOM
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user