Merge pull request #9850 from dolthub/macneale4-claude/archive-debug

[no-release-notes] add `dolt admin archive-inspect` cmd
2026-04-30 03:26:47 -05:00 · 2025-09-18 09:53:14 -07:00
parent 6529536532 76cf7a5f07
commit 428e459d35
6 changed files with 763 additions and 1 deletions
@@ -26,5 +26,6 @@ var Commands = cli.NewHiddenSubCommandHandler("admin", "Commands for directly wo
 	StorageCmd{},
 	NewGenToOldGenCmd{},
 	ConjoinCmd{},
+	ArchiveInspectCmd{},
 	createchunk.Commands,
 })
@@ -0,0 +1,270 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package admin
+
+import (
+	"context"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/dolthub/dolt/go/cmd/dolt/cli"
+	"github.com/dolthub/dolt/go/libraries/doltcore/env"
+	"github.com/dolthub/dolt/go/libraries/utils/argparser"
+	"github.com/dolthub/dolt/go/store/hash"
+	"github.com/dolthub/dolt/go/store/nbs"
+)
+
+type ArchiveInspectCmd struct {
+}
+
+func (cmd ArchiveInspectCmd) Name() string {
+	return "archive-inspect"
+}
+
+func (cmd ArchiveInspectCmd) Description() string {
+	return "Inspect a Dolt archive (.darc) file and display basic information about it."
+}
+
+func (cmd ArchiveInspectCmd) RequiresRepo() bool {
+	return false
+}
+
+func (cmd ArchiveInspectCmd) Docs() *cli.CommandDocumentation {
+	return &cli.CommandDocumentation{
+		ShortDesc: "Inspect a Dolt archive (.darc) file and display information about it",
+		LongDesc: `Inspects a Dolt archive (.darc) file and displays detailed information about its structure, contents, and metadata.
+
+Archive files are compressed collections of chunks used by Dolt for storage. This command provides debugging and inspection capabilities for these files.
+
+This command takes a path to an archive file, and ignores any database information that would otherwise be provided. To skip wasting time, run this command outside of a Dolt repository.'
+
+Basic usage displays archive metadata, structure information, and statistics. Advanced usage allows inspection of specific chunks by object ID or raw index positions.`,
+		Synopsis: []string{
+			"[--mmap] <archive-path>",
+			"[--mmap] --object-id <hash> <archive-path>",
+			"[--mmap] --inspect-index <index> <archive-path>",
+		},
+	}
+}
+
+func (cmd ArchiveInspectCmd) ArgParser() *argparser.ArgParser {
+	ap := argparser.NewArgParserWithMaxArgs(cmd.Name(), 1)
+	ap.SupportsString("archive-path", "", "archive_path", "Path to the archive file (.darc) to inspect")
+	ap.SupportsFlag("mmap", "", "Enable memory-mapped index reading. Default is to load index into memory.")
+	ap.SupportsString("object-id", "", "object_id", "Base32-encoded 20-byte object ID to inspect within the archive")
+	ap.SupportsString("inspect-index", "", "index", "Inspect raw index reader data at specific index position")
+	return ap
+}
+
+func (cmd ArchiveInspectCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv, cliCtx cli.CliContext) int {
+	ap := cmd.ArgParser()
+	usage, _ := cli.HelpAndUsagePrinters(cli.CommandDocsForCommandString(commandStr, cli.CommandDocumentationContent{}, ap))
+	apr := cli.ParseArgsOrDie(ap, args, usage)
+
+	var archivePath string
+	if archivePathArg, ok := apr.GetValue("archive-path"); ok {
+		archivePath = archivePathArg
+	} else if apr.NArg() == 1 {
+		archivePath = apr.Arg(0)
+	} else {
+		usage()
+		return 1
+	}
+
+	if _, err := os.Stat(archivePath); os.IsNotExist(err) {
+		cli.PrintErrln("Error: Archive file does not exist:", archivePath)
+		return 1
+	}
+	if !strings.HasSuffix(strings.ToLower(archivePath), nbs.ArchiveFileSuffix) {
+		cli.PrintErrln("Warning: File does not have .darc extension")
+	}
+
+	absPath, err := filepath.Abs(archivePath)
+	if err != nil {
+		cli.PrintErrln("Error getting absolute path:", err.Error())
+		return 1
+	}
+
+	enableMmap := apr.Contains("mmap")
+	inspector, err := nbs.NewArchiveInspectorFromFileWithMmap(ctx, absPath, enableMmap)
+	if err != nil {
+		cli.PrintErrln("Error opening archive file:", err.Error())
+		return 1
+	}
+	defer inspector.Close()
+
+	cli.Println("Archive file:", absPath)
+	cli.Printf("File size: %d bytes\n", inspector.FileSize())
+	cli.Printf("Format version: %d\n", inspector.FormatVersion())
+	cli.Printf("File signature: %s\n", inspector.FileSignature())
+	cli.Println()
+
+	cli.Printf("Chunk count: %d\n", inspector.ChunkCount())
+	cli.Printf("Byte span count: %d\n", inspector.ByteSpanCount())
+	cli.Printf("Index size: %d bytes\n", inspector.IndexSize())
+	cli.Printf("Metadata size: %d bytes\n", inspector.MetadataSize())
+
+	// Display metadata if present
+	if inspector.MetadataSize() > 0 {
+		cli.Println()
+		cli.Println("Metadata:")
+		metadataBytes, err := inspector.GetMetadata(ctx)
+		if err != nil {
+			cli.PrintErrln("Error reading metadata:", err.Error())
+		} else {
+			// Try to parse as JSON and pretty print
+			var metadataObj interface{}
+			if err := json.Unmarshal(metadataBytes, &metadataObj); err == nil {
+				prettyJSON, _ := json.MarshalIndent(metadataObj, "  ", "  ")
+				cli.Printf("  %s\n", string(prettyJSON))
+			} else {
+				// If not JSON, just print as ascii. To date we don't have any non-JSON metadata.
+				cli.Printf("  %s\n", string(metadataBytes))
+			}
+		}
+	} else {
+		cli.Println("Metadata: none")
+	}
+
+	if objectIdStr, ok := apr.GetValue("object-id"); ok {
+		cli.Println()
+		cli.Println("Object inspection:")
+
+		objectHash, hashOk := hash.MaybeParse(objectIdStr)
+		if !hashOk {
+			cli.PrintErrln("Error: Invalid object ID format. Expected 32-character base32 encoded hash.")
+			return 1
+		}
+
+		debugInfo := inspector.SearchChunkDebug(objectHash)
+
+		cli.Printf("Hash: %s\n", debugInfo.Hash)
+		cli.Printf("Prefix: 0x%x\n", debugInfo.Prefix)
+		cli.Printf("Suffix: 0x%x\n", debugInfo.Suffix)
+		cli.Printf("Index reader type: %s\n", debugInfo.IndexReaderType)
+		cli.Printf("Chunk count: %d\n", debugInfo.ChunkCount)
+		cli.Printf("Possible match index: %d\n", debugInfo.PossibleMatch)
+		cli.Printf("Valid range: %t\n", debugInfo.ValidRange)
+		cli.Printf("Final search result: %d\n", debugInfo.FinalResult)
+
+		cli.Printf("Prefix matches found: %d\n", len(debugInfo.Matches))
+		for i, match := range debugInfo.Matches {
+			cli.Printf("  Match %d: index=%d, suffixMatch=%t, suffix=0x%x\n",
+				i, match.Index, match.SuffixMatch, match.SuffixAtIdx)
+		}
+		cli.Println()
+
+		// Look up the object in the archive
+		chunkInfo, err := inspector.GetChunkInfo(ctx, objectHash)
+		if err != nil {
+			cli.PrintErrln("Error inspecting object:", err.Error())
+			return 1
+		}
+
+		if chunkInfo == nil {
+			cli.Printf("Object %s not found in archive\n", objectIdStr)
+		} else {
+			cli.Printf("Compression type: %s\n", chunkInfo.CompressionType)
+			cli.Printf("Dictionary byte span ID: %d\n", chunkInfo.DictionaryID)
+			cli.Printf("Data byte span ID: %d\n", chunkInfo.DataID)
+
+			if chunkInfo.DictionaryByteSpan.Length > 0 {
+				cli.Printf("Dictionary byte span: offset=%d, length=%d\n",
+					chunkInfo.DictionaryByteSpan.Offset, chunkInfo.DictionaryByteSpan.Length)
+			} else {
+				cli.Println("Dictionary byte span: none (empty)")
+			}
+
+			cli.Printf("Data byte span: offset=%d, length=%d\n",
+				chunkInfo.DataByteSpan.Offset, chunkInfo.DataByteSpan.Length)
+		}
+	}
+
+	// Handle inspect-index if provided
+	if indexStr, ok := apr.GetValue("inspect-index"); ok {
+		cli.Println()
+		cli.Println("Index inspection:")
+
+		// Parse the index
+		indexVal, err := strconv.ParseUint(indexStr, 10, 32)
+		if err != nil {
+			cli.PrintErrln("Error: Invalid index format. Expected unsigned integer.")
+			return 1
+		}
+
+		idx := uint32(indexVal)
+		details := inspector.GetIndexReaderDetails(idx)
+
+		// Print all details
+		cli.Printf("Index: %d\n", details.RequestedIndex)
+		cli.Printf("Index reader type: %s\n", details.IndexReaderType)
+		cli.Printf("Chunk count: %d\n", details.ChunkCount)
+		cli.Printf("Byte span count: %d\n", details.ByteSpanCount)
+
+		if details.Error != "" {
+			cli.Printf("Error: %s\n", details.Error)
+			return 1
+		}
+
+		cli.Printf("Hash: %s\n", details.Hash)
+		cli.Printf("Prefix: 0x%x\n", details.Prefix)
+		cli.Printf("Suffix: 0x%x\n", details.Suffix)
+		cli.Printf("Dictionary ID: %d\n", details.DictionaryID)
+		cli.Printf("Data ID: %d\n", details.DataID)
+
+		// Show implementation-specific details
+		cli.Println()
+		cli.Println("Implementation details:")
+
+		// Show common calculation details first
+		if details.ExpectedSuffixStart != 0 || details.ExpectedSuffixEnd != 0 {
+			cli.Printf("Expected suffix start: %d\n", details.ExpectedSuffixStart)
+			cli.Printf("Expected suffix end: %d\n", details.ExpectedSuffixEnd)
+		}
+
+		// Show in-memory specific details
+		if details.PrefixArrayLength > 0 {
+			cli.Printf("Storage type: In-memory arrays\n")
+			cli.Printf("Prefix array length: %d\n", details.PrefixArrayLength)
+			cli.Printf("Suffix array length: %d\n", details.SuffixArrayLength)
+			cli.Printf("Chunk ref array length: %d\n", details.ChunkRefArrayLength)
+			cli.Printf("Span index array length: %d\n", details.SpanIndexArrayLength)
+			cli.Printf("Suffix array bounds valid: %t\n", details.SuffixArrayBounds)
+		}
+
+		// Show mmap specific details
+		if details.MmapIndexSize > 0 {
+			cli.Printf("Storage type: Memory-mapped file\n")
+			cli.Printf("Span index offset: %d\n", details.SpanIndexOffset)
+			cli.Printf("Prefixes offset: %d\n", details.PrefixesOffset)
+			cli.Printf("Chunk refs offset: %d\n", details.ChunkRefsOffset)
+			cli.Printf("Suffixes offset: %d\n", details.SuffixesOffset)
+			cli.Printf("Actual suffix file offset: %d\n", details.ActualSuffixOffset)
+		}
+
+		// Show raw suffix bytes for both implementations
+		if len(details.RawSuffixBytes) > 0 {
+			cli.Printf("Raw suffix bytes: %x\n", details.RawSuffixBytes)
+		}
+		if details.RawSuffixBytesError != "" {
+			cli.Printf("Raw suffix bytes error: %s\n", details.RawSuffixBytesError)
+		}
+	}
+
+	return 0
+}
@@ -0,0 +1,348 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nbs
+
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+
+	"github.com/dolthub/dolt/go/store/hash"
+)
+
+// ByteSpanInfo provides information about a byte span in the archive
+type ByteSpanInfo struct {
+	Offset uint64
+	Length uint64
+}
+
+// ChunkInfo contains information about a chunk within the archive
+type ChunkInfo struct {
+	CompressionType    string
+	DictionaryID       uint32
+	DataID             uint32
+	DictionaryByteSpan ByteSpanInfo
+	DataByteSpan       ByteSpanInfo
+}
+
+// SearchDebugInfo contains detailed information about a chunk search operation
+type SearchDebugInfo struct {
+	Hash            string
+	Prefix          uint64
+	Suffix          []byte
+	PossibleMatch   int32
+	ChunkCount      uint32
+	IndexReaderType string
+	ValidRange      bool
+	FinalResult     int
+	Matches         []PrefixMatch
+}
+
+// PrefixMatch contains information about a chunk that matches the search prefix
+type PrefixMatch struct {
+	Index       uint32
+	SuffixAtIdx []byte
+	SuffixMatch bool
+}
+
+// IndexReaderDetails contains detailed information about index reader state
+type IndexReaderDetails struct {
+	IndexReaderType string
+	RequestedIndex  uint32
+	ChunkCount      uint32
+	ByteSpanCount   uint32
+	Error           string
+	Hash            string
+	Prefix          uint64
+	Suffix          []byte
+	DictionaryID    uint32
+	DataID          uint32
+
+	// In-memory reader specific fields
+	PrefixArrayLength    int
+	SuffixArrayLength    int
+	ChunkRefArrayLength  int
+	SpanIndexArrayLength int
+	ExpectedSuffixStart  uint32
+	ExpectedSuffixEnd    uint32
+	SuffixArrayBounds    bool
+	RawSuffixBytes       []byte
+
+	// Memory-mapped reader specific fields
+	MmapIndexSize       uint64
+	MmapByteSpanCount   uint32
+	MmapChunkCount      uint32
+	SpanIndexOffset     uint64
+	PrefixesOffset      uint64
+	ChunkRefsOffset     uint64
+	SuffixesOffset      uint64
+	ActualSuffixOffset  uint64
+	RawSuffixBytesError string
+}
+
+// ArchiveInspector provides a way to inspect archive files from outside the nbs package. Intended for debugging and inspection,
+// currently only used by the `dolt admin archive-inspect` command.
+type ArchiveInspector struct {
+	reader archiveReader
+}
+
+// NewArchiveInspectorFromFileWithMmap creates an ArchiveInspector from a file path with configurable mmap
+func NewArchiveInspectorFromFileWithMmap(ctx context.Context, archivePath string, enableMmap bool) (*ArchiveInspector, error) {
+	fra, err := newFileReaderAt(archivePath, enableMmap)
+	if err != nil {
+		return nil, err
+	}
+
+	// Use a dummy hash since it's not needed when we have the file reader already.
+	dummyHash := hash.Hash{}
+	stats := &Stats{}
+
+	archiveReader, err := newArchiveReader(ctx, fra, dummyHash, uint64(fra.sz), stats)
+	if err != nil {
+		fra.Close()
+		return nil, err
+	}
+
+	return &ArchiveInspector{reader: archiveReader}, nil
+}
+
+// Close releases resources associated with the archive inspector
+func (ai *ArchiveInspector) Close() error {
+	return ai.reader.close()
+}
+
+// ChunkCount returns the number of chunks in the archive
+func (ai *ArchiveInspector) ChunkCount() uint32 {
+	return ai.reader.count()
+}
+
+// FormatVersion returns the format version of the archive
+func (ai *ArchiveInspector) FormatVersion() uint8 {
+	return ai.reader.footer.formatVersion
+}
+
+// FileSignature returns the file signature of the archive
+func (ai *ArchiveInspector) FileSignature() string {
+	return ai.reader.footer.fileSignature
+}
+
+// IndexSize returns the size of the index section in bytes
+func (ai *ArchiveInspector) IndexSize() uint64 {
+	return ai.reader.footer.indexSize
+}
+
+// MetadataSize returns the size of the metadata section in bytes
+func (ai *ArchiveInspector) MetadataSize() uint32 {
+	return ai.reader.footer.metadataSize
+}
+
+// FileSize returns the total size of the archive file
+func (ai *ArchiveInspector) FileSize() uint64 {
+	return ai.reader.footer.fileSize
+}
+
+// ByteSpanCount returns the number of byte spans in the archive
+func (ai *ArchiveInspector) ByteSpanCount() uint32 {
+	return ai.reader.footer.byteSpanCount
+}
+
+// GetMetadata retrieves the metadata from the archive as raw bytes
+func (ai *ArchiveInspector) GetMetadata(ctx context.Context) ([]byte, error) {
+	stats := &Stats{}
+	return ai.reader.getMetadata(ctx, stats)
+}
+
+// SearchChunk exposes the underlying search method for debugging
+func (ai *ArchiveInspector) SearchChunk(h hash.Hash) int {
+	return ai.reader.search(h)
+}
+
+// SearchChunkDebug exposes detailed search information for debugging
+func (ai *ArchiveInspector) SearchChunkDebug(h hash.Hash) *SearchDebugInfo {
+	prefix := h.Prefix()
+	possibleMatch := ai.reader.indexReader.searchPrefix(prefix)
+	targetSfx := h.Suffix()
+
+	debug := &SearchDebugInfo{
+		Hash:            h.String(),
+		Prefix:          prefix,
+		Suffix:          targetSfx,
+		PossibleMatch:   possibleMatch,
+		ChunkCount:      ai.reader.footer.chunkCount,
+		IndexReaderType: fmt.Sprintf("%T", ai.reader.indexReader),
+	}
+
+	// Check if possibleMatch is in valid range
+	if possibleMatch < 0 || uint32(possibleMatch) >= ai.reader.footer.chunkCount {
+		debug.ValidRange = false
+		debug.FinalResult = -1
+		return debug
+	}
+
+	debug.ValidRange = true
+
+	// Check prefix matches in the range
+	matches := []PrefixMatch{}
+	for idx := uint32(possibleMatch); idx < ai.reader.footer.chunkCount && ai.reader.indexReader.getPrefix(idx) == prefix; idx++ {
+		suffixAtIdx := ai.reader.indexReader.getSuffix(idx)
+		match := PrefixMatch{
+			Index:       idx,
+			SuffixAtIdx: suffixAtIdx[:],
+			SuffixMatch: suffixAtIdx == suffix(targetSfx),
+		}
+		matches = append(matches, match)
+
+		if suffixAtIdx == suffix(targetSfx) {
+			debug.FinalResult = int(idx)
+			debug.Matches = matches
+			return debug
+		}
+	}
+
+	debug.Matches = matches
+	debug.FinalResult = -1
+	return debug
+}
+
+// GetIndexReaderDetails exposes internal index reader state for debugging
+func (ai *ArchiveInspector) GetIndexReaderDetails(idx uint32) *IndexReaderDetails {
+	details := &IndexReaderDetails{
+		IndexReaderType: fmt.Sprintf("%T", ai.reader.indexReader),
+		RequestedIndex:  idx,
+		ChunkCount:      ai.reader.footer.chunkCount,
+		ByteSpanCount:   ai.reader.footer.byteSpanCount,
+	}
+
+	if idx >= ai.reader.footer.chunkCount {
+		details.Error = "index out of range"
+		return details
+	}
+
+	// Get prefix and suffix
+	prefix := ai.reader.indexReader.getPrefix(idx)
+	suffix := ai.reader.indexReader.getSuffix(idx)
+
+	details.Prefix = prefix
+	details.Suffix = suffix[:]
+
+	// Construct the full hash from prefix and suffix
+	hashBytes := make([]byte, hash.ByteLen)
+	binary.BigEndian.PutUint64(hashBytes[:hash.PrefixLen], prefix)
+	copy(hashBytes[hash.PrefixLen:], suffix[:])
+	reconstructedHash := hash.New(hashBytes)
+	details.Hash = reconstructedHash.String()
+
+	// Get chunk references
+	dictID, dataID := ai.reader.indexReader.getChunkRef(idx)
+	details.DictionaryID = dictID
+	details.DataID = dataID
+
+	// For in-memory reader, expose the raw array details
+	if inMem, ok := ai.reader.indexReader.(*inMemoryArchiveIndexReader); ok {
+		details.PrefixArrayLength = len(inMem.prefixes)
+		details.SuffixArrayLength = len(inMem.suffixes)
+		details.ChunkRefArrayLength = len(inMem.chunkRefs)
+		details.SpanIndexArrayLength = len(inMem.spanIndex)
+
+		// Calculate expected suffix position
+		expectedSuffixStart := idx * hash.SuffixLen
+		details.ExpectedSuffixStart = expectedSuffixStart
+		details.ExpectedSuffixEnd = expectedSuffixStart + hash.SuffixLen
+		details.SuffixArrayBounds = expectedSuffixStart+hash.SuffixLen <= uint32(len(inMem.suffixes))
+
+		// Show raw bytes around the suffix position for debugging
+		if expectedSuffixStart < uint32(len(inMem.suffixes)) {
+			end := expectedSuffixStart + hash.SuffixLen
+			if end > uint32(len(inMem.suffixes)) {
+				end = uint32(len(inMem.suffixes))
+			}
+			details.RawSuffixBytes = inMem.suffixes[expectedSuffixStart:end]
+		}
+	}
+
+	// For mmap reader, expose similar details
+	if mmapReader, isMmap := ai.reader.indexReader.(*mmapIndexReader); isMmap {
+		details.MmapIndexSize = mmapReader.indexSize
+		details.MmapByteSpanCount = mmapReader.byteSpanCount
+		details.MmapChunkCount = mmapReader.chunkCount
+		details.SpanIndexOffset = mmapReader.spanIndexOffset
+		details.PrefixesOffset = mmapReader.prefixesOffset
+		details.ChunkRefsOffset = mmapReader.chunkRefsOffset
+		details.SuffixesOffset = mmapReader.suffixesOffset
+
+		// Calculate expected suffix position in mmap
+		expectedSuffixStart := uint64(idx) * hash.SuffixLen
+		actualSuffixOffset := mmapReader.suffixesOffset + expectedSuffixStart
+		details.ExpectedSuffixStart = uint32(expectedSuffixStart)
+		details.ExpectedSuffixEnd = uint32(expectedSuffixStart + hash.SuffixLen)
+		details.ActualSuffixOffset = actualSuffixOffset
+
+		// Try to read raw bytes around the suffix position
+		if mmapReader.data != nil {
+			rawBytes := make([]byte, hash.SuffixLen)
+			_, err := mmapReader.data.ReadAt(rawBytes, int64(actualSuffixOffset))
+			if err == nil {
+				details.RawSuffixBytes = rawBytes
+			} else {
+				details.RawSuffixBytesError = err.Error()
+			}
+		}
+	}
+
+	return details
+}
+
+// GetChunkInfo looks up information about a specific chunk in the archive
+func (ai *ArchiveInspector) GetChunkInfo(ctx context.Context, h hash.Hash) (*ChunkInfo, error) {
+	idx := ai.reader.search(h)
+	if idx < 0 {
+		return nil, fmt.Errorf("chunk %s not found", h.String())
+	}
+
+	// Get the chunk reference (dictionary ID and data ID)
+	dictID, dataID := ai.reader.getChunkRef(idx)
+
+	dictByteSpan := ai.reader.getByteSpanByID(dictID)
+	dataByteSpan := ai.reader.getByteSpanByID(dataID)
+
+	compressionType := "unknown"
+	formatVersion := ai.reader.footer.formatVersion
+
+	if dictID == 0 {
+		// Dictionary ID 0 means no dictionary
+		if formatVersion == 1 {
+			compressionType = "zstd (no dictionary)"
+		} else if formatVersion >= 2 {
+			compressionType = "snappy"
+		}
+	} else {
+		// Dictionary ID > 0 means zstd with dictionary
+		compressionType = "zstd (with dictionary)"
+	}
+
+	return &ChunkInfo{
+		CompressionType: compressionType,
+		DictionaryID:    dictID,
+		DataID:          dataID,
+		DictionaryByteSpan: ByteSpanInfo{
+			Offset: dictByteSpan.offset,
+			Length: dictByteSpan.length,
+		},
+		DataByteSpan: ByteSpanInfo{
+			Offset: dataByteSpan.offset,
+			Length: dataByteSpan.length,
+		},
+	}, nil
+}
@@ -323,7 +323,7 @@ func (f *inMemoryArchiveIndexReader) getChunkRef(idx uint32) (dict, data uint32)
 }

 func (f *inMemoryArchiveIndexReader) getSuffix(idx uint32) suffix {
-	if idx >= uint32(len(f.suffixes)/hash.SuffixLen) {
+	if idx >= f.getNumChunks() {
 		return suffix{}
 	}
 	start := uint64(idx) * hash.SuffixLen
@@ -0,0 +1,142 @@
+#! /usr/bin/env bats
+load $BATS_TEST_DIRNAME/helper/common.bash
+
+ARCHIVE_PATH="$BATS_TEST_DIRNAME/archive-test-repos/v2/noms/oldgen/27avtn2a3upddh52eu750m4709gfps7s.darc"
+
+setup() {
+    setup_no_dolt_init
+}
+
+teardown() {
+    teardown_common
+}
+
+@test "admin-archive-inspect: basic archive inspection" {
+    run dolt admin archive-inspect "$ARCHIVE_PATH"
+    [ "$status" -eq 0 ]
+    [[ "$output" =~ "Archive file:" ]] || false
+    [[ "$output" =~ "File size:" ]] || false
+    [[ "$output" =~ "Format version:" ]] || false
+    [[ "$output" =~ "File signature:" ]] || false
+    [[ "$output" =~ "Chunk count:" ]] || false
+    [[ "$output" =~ "Byte span count:" ]] || false
+    [[ "$output" =~ "Index size:" ]] || false
+    [[ "$output" =~ "Metadata size:" ]] || false
+}
+
+@test "admin-archive-inspect: archive file must exist" {
+    run dolt admin archive-inspect "/nonexistent/file.darc"
+    [ "$status" -eq 1 ]
+    [[ "$output" =~ "Error: Archive file does not exist:" ]] || false
+}
+
+@test "admin-archive-inspect: mmap flag works" {
+    run dolt admin archive-inspect --mmap "$ARCHIVE_PATH"
+    [ "$status" -eq 0 ]
+    [[ "$output" =~ "Archive file:" ]] || false
+    [[ "$output" =~ "Chunk count:" ]] || false
+}
+
+@test "admin-archive-inspect: object-id inspection with invalid hash" {
+    run dolt admin archive-inspect --object-id "invalid" "$ARCHIVE_PATH"
+    [ "$status" -eq 1 ]
+    [[ "$output" =~ "Error: Invalid object ID format. Expected 32-character base32 encoded hash." ]] || false
+}
+
+@test "admin-archive-inspect: object-id inspection with valid hash format but not found" {
+    run dolt admin archive-inspect --object-id "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "$ARCHIVE_PATH"
+    [ "$status" -eq 1 ]
+    [[ "$output" =~ "Object inspection:" ]] || false
+    [[ "$output" =~ "Hash:" ]] || false
+    [[ "$output" =~ "Prefix:" ]] || false
+    [[ "$output" =~ "Suffix:" ]] || false
+    [[ "$output" =~ "Error inspecting object:" ]] || false
+    [[ "$output" =~ "not found" ]] || false
+}
+
+@test "admin-archive-inspect: object-id inspection with existing hash" {
+    # Use the hash we know exists at index 42
+    run dolt admin archive-inspect --object-id "4pguchpitq1bsb09ogaivmcstgsnbd3k" "$ARCHIVE_PATH"
+    [ "$status" -eq 0 ]
+    [[ "$output" =~ "Object inspection:" ]] || false
+    [[ "$output" =~ "Hash: 4pguchpitq1bsb09ogaivmcstgsnbd3k" ]] || false
+    [[ "$output" =~ "Prefix: 0x2661e64732ee82be" ]] || false
+    [[ "$output" =~ "Suffix: 0x2c09c4152fd99cec3975b474" ]] || false
+    [[ "$output" =~ "Possible match index: 42" ]] || false
+    [[ "$output" =~ "Compression type: zstd (with dictionary" ]] || false
+    [[ "$output" =~ "Dictionary byte span ID: 1" ]] || false
+    [[ "$output" =~ "Data byte span ID: 70" ]] || false
+    [[ "$output" =~ "Dictionary byte span: offset=0, length=296" ]] || false
+    [[ "$output" =~ "Data byte span: offset=20850, length=43" ]] || false
+}
+
+@test "admin-archive-inspect: object-id inspection with existing hash with mmap" {
+  # Use the hash we know exists at index 42
+  run dolt admin archive-inspect --mmap --object-id "4pguchpitq1bsb09ogaivmcstgsnbd3k" "$ARCHIVE_PATH"
+  [ "$status" -eq 0 ]
+  [[ "$output" =~ "Object inspection:" ]] || false
+  [[ "$output" =~ "Hash: 4pguchpitq1bsb09ogaivmcstgsnbd3k" ]] || false
+  [[ "$output" =~ "Prefix: 0x2661e64732ee82be" ]] || false
+  [[ "$output" =~ "Suffix: 0x2c09c4152fd99cec3975b474" ]] || false
+  [[ "$output" =~ "Possible match index: 42" ]] || false
+  [[ "$output" =~ "Compression type: zstd (with dictionary" ]] || false
+  [[ "$output" =~ "Dictionary byte span ID: 1" ]] || false
+  [[ "$output" =~ "Data byte span ID: 70" ]] || false
+  [[ "$output" =~ "Dictionary byte span: offset=0, length=296" ]] || false
+  [[ "$output" =~ "Data byte span: offset=20850, length=43" ]] || false
+}
+
+@test "admin-archive-inspect: inspect-index with invalid index" {
+    run dolt admin archive-inspect --inspect-index "invalid" "$ARCHIVE_PATH"
+    [ "$status" -eq 1 ]
+    [[ "$output" =~ "Error: Invalid index format. Expected unsigned integer." ]] || false
+}
+
+@test "admin-archive-inspect: inspect-index with valid index" {
+    run dolt admin archive-inspect --inspect-index "0" "$ARCHIVE_PATH"
+    [ "$status" -eq 0 ]
+    [[ "$output" =~ "Index inspection:" ]] || false
+    [[ "$output" =~ "Index: 0" ]] || false
+    [[ "$output" =~ "Index reader type: *nbs.inMemoryArchiveIndexReader" ]] || false
+    [[ "$output" =~ "Chunk count: 230" ]] || false
+    [[ "$output" =~ "Byte span count: 231" ]] || false
+    [[ "$output" =~ "Hash: 03fe1b95i4bqpetk2klb46devv1saqmd" ]] || false
+    [[ "$output" =~ "Prefix: 0xdee0ad259117ac" ]] || false
+    [[ "$output" =~ "Suffix: 0xbbb4152ab219aeffc3c56acd" ]] || false
+}
+
+@test "admin-archive-inspect: inspect-index with valid index and mmap" {
+  run dolt admin archive-inspect --mmap --inspect-index "0" "$ARCHIVE_PATH"
+  [ "$status" -eq 0 ]
+  [[ "$output" =~ "Index inspection:" ]] || false
+  [[ "$output" =~ "Index: 0" ]] || false
+  [[ "$output" =~ "Index reader type: *nbs.mmapIndexReader" ]] || false
+  [[ "$output" =~ "Chunk count: 230" ]] || false
+  [[ "$output" =~ "Byte span count: 231" ]] || false
+  [[ "$output" =~ "Hash: 03fe1b95i4bqpetk2klb46devv1saqmd" ]] || false
+  [[ "$output" =~ "Prefix: 0xdee0ad259117ac" ]] || false
+  [[ "$output" =~ "Suffix: 0xbbb4152ab219aeffc3c56acd" ]] || false
+}
+
+@test "admin-archive-inspect: inspect-index with out of range index" {
+    # 230 chunks, so index 231 is out of range
+    run dolt admin archive-inspect --inspect-index "231" "$ARCHIVE_PATH"
+    [ "$status" -eq 1 ]
+    [[ "$output" =~ "Error: index out of range" ]] || false
+}
+
+@test "admin-archive-inspect: mmap and non-mmap produce similar output format" {
+    run dolt admin archive-inspect "$ARCHIVE_PATH"
+    [ "$status" -eq 0 ]
+    output_nommap="$output"
+    
+    run dolt admin archive-inspect --mmap "$ARCHIVE_PATH"
+    [ "$status" -eq 0 ]
+    output_mmap="$output"
+    
+    # Both should have the same basic structure
+    [[ "$output_nommap" =~ "Archive file:" ]] || false
+    [[ "$output_mmap" =~ "Archive file:" ]] || false
+    [[ "$output_nommap" =~ "Chunk count:" ]] || false
+    [[ "$output_mmap" =~ "Chunk count:" ]] || false
+}
@@ -142,6 +142,7 @@ SKIP_SERVER_TESTS=$(cat <<-EOM
 ~import-no-header-csv.bats~
 ~import-no-header-psv.bats~
 ~admin-conjoin.bats~
+~admin-archive-inspect.bats~
 EOM
 )