From f6d99307c1e43a8a59da44cdd6add42d170e8b03 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Mon, 15 Sep 2025 18:08:05 +0000 Subject: [PATCH] Add the --object-id flag to inspect an object --- go/cmd/dolt/commands/admin/archive_inspect.go | 41 +++++++++++++ go/store/nbs/archive_inspect.go | 61 +++++++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/go/cmd/dolt/commands/admin/archive_inspect.go b/go/cmd/dolt/commands/admin/archive_inspect.go index b6a5218681..ad1be585a4 100644 --- a/go/cmd/dolt/commands/admin/archive_inspect.go +++ b/go/cmd/dolt/commands/admin/archive_inspect.go @@ -24,6 +24,7 @@ import ( "github.com/dolthub/dolt/go/cmd/dolt/cli" "github.com/dolthub/dolt/go/libraries/doltcore/env" "github.com/dolthub/dolt/go/libraries/utils/argparser" + "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/nbs" ) @@ -50,6 +51,7 @@ func (cmd ArchiveInspectCmd) ArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithMaxArgs(cmd.Name(), 1) ap.SupportsString("archive-path", "", "archive_path", "Full path to the archive file (.darc) to inspect") ap.SupportsFlag("mmap", "", "Enable memory-mapped index reading for better performance") + ap.SupportsString("object-id", "", "object_id", "Base32-encoded 20-byte object ID to inspect within the archive") return ap } @@ -134,5 +136,44 @@ func (cmd ArchiveInspectCmd) Exec(ctx context.Context, commandStr string, args [ } } + // Handle object-id inspection if provided + if objectIdStr, ok := apr.GetValue("object-id"); ok { + cli.Println() + cli.Println("Object inspection:") + + // Parse the hash + objectHash, hashOk := hash.MaybeParse(objectIdStr) + if !hashOk { + cli.PrintErrln("Error: Invalid object ID format. Expected 32-character base32 encoded hash.") + return 1 + } + + // Look up the object in the archive + chunkInfo, err := inspector.GetChunkInfo(ctx, objectHash) + if err != nil { + cli.PrintErrln("Error inspecting object:", err.Error()) + return 1 + } + + if chunkInfo == nil { + cli.Printf("Object %s not found in archive\n", objectIdStr) + } else { + cli.Printf("Object ID: %s\n", objectIdStr) + cli.Printf("Compression type: %s\n", chunkInfo.CompressionType) + cli.Printf("Dictionary byte span ID: %d\n", chunkInfo.DictionaryID) + cli.Printf("Data byte span ID: %d\n", chunkInfo.DataID) + + if chunkInfo.DictionaryByteSpan.Length > 0 { + cli.Printf("Dictionary byte span: offset=%d, length=%d\n", + chunkInfo.DictionaryByteSpan.Offset, chunkInfo.DictionaryByteSpan.Length) + } else { + cli.Println("Dictionary byte span: none (empty)") + } + + cli.Printf("Data byte span: offset=%d, length=%d\n", + chunkInfo.DataByteSpan.Offset, chunkInfo.DataByteSpan.Length) + } + } + return 0 } \ No newline at end of file diff --git a/go/store/nbs/archive_inspect.go b/go/store/nbs/archive_inspect.go index 580abc3c4f..ffbbd7856e 100644 --- a/go/store/nbs/archive_inspect.go +++ b/go/store/nbs/archive_inspect.go @@ -20,6 +20,21 @@ import ( "github.com/dolthub/dolt/go/store/hash" ) +// ByteSpanInfo provides information about a byte span in the archive +type ByteSpanInfo struct { + Offset uint64 + Length uint64 +} + +// ChunkInfo contains information about a chunk within the archive +type ChunkInfo struct { + CompressionType string + DictionaryID uint32 + DataID uint32 + DictionaryByteSpan ByteSpanInfo + DataByteSpan ByteSpanInfo +} + // ArchiveInspector provides a way to inspect archive files from outside the nbs package type ArchiveInspector struct { reader archiveReader @@ -94,4 +109,50 @@ func (ai *ArchiveInspector) ByteSpanCount() uint32 { func (ai *ArchiveInspector) GetMetadata(ctx context.Context) ([]byte, error) { stats := &Stats{} return ai.reader.getMetadata(ctx, stats) +} + +// GetChunkInfo looks up information about a specific chunk in the archive +func (ai *ArchiveInspector) GetChunkInfo(ctx context.Context, h hash.Hash) (*ChunkInfo, error) { + // Search for the chunk + idx := ai.reader.search(h) + if idx < 0 { + return nil, nil // Chunk not found + } + + // Get the chunk reference (dictionary ID and data ID) + dictID, dataID := ai.reader.getChunkRef(idx) + + // Get the byte span information + dictByteSpan := ai.reader.getByteSpanByID(dictID) + dataByteSpan := ai.reader.getByteSpanByID(dataID) + + // Determine compression type based on dictionary ID and archive version + compressionType := "unknown" + formatVersion := ai.reader.footer.formatVersion + + if dictID == 0 { + // Dictionary ID 0 means no dictionary + if formatVersion == 1 { + compressionType = "zstd (no dictionary)" + } else if formatVersion >= 2 { + compressionType = "snappy" + } + } else { + // Dictionary ID > 0 means zstd with dictionary + compressionType = "zstd (with dictionary)" + } + + return &ChunkInfo{ + CompressionType: compressionType, + DictionaryID: dictID, + DataID: dataID, + DictionaryByteSpan: ByteSpanInfo{ + Offset: dictByteSpan.offset, + Length: dictByteSpan.length, + }, + DataByteSpan: ByteSpanInfo{ + Offset: dataByteSpan.offset, + Length: dataByteSpan.length, + }, + }, nil } \ No newline at end of file