mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-03 19:41:24 -05:00
Merge pull request #5258 from dolthub/zachmu/gc-panic
[no-release-notes] noms command to find dangling references
This commit is contained in:
@@ -47,6 +47,7 @@ var commands = []*util.Command{
|
||||
nomsVersion,
|
||||
nomsManifest,
|
||||
nomsCat,
|
||||
nomsWalk,
|
||||
}
|
||||
|
||||
var kingpinCommands = []util.KingpinCommand{
|
||||
@@ -212,6 +213,13 @@ See Spelling Objects at https://github.com/attic-labs/noms/blob/master/doc/spell
|
||||
show.Flag("tz", "display formatted date comments in specified timezone, must be: local or utc").Enum("local", "utc")
|
||||
show.Arg("object", "a noms object").Required().String()
|
||||
|
||||
// walk
|
||||
walk := noms.Command("walk", `Walks references contained in an object.
|
||||
See Spelling Objects at https://github.com/attic-labs/noms/blob/master/doc/spelling.md for details on the object argument.
|
||||
`)
|
||||
walk.Arg("object", "a noms object").String()
|
||||
walk.Flag("quiet", "If true, prints only dangling refs, not the paths of all refs").Bool()
|
||||
|
||||
// version
|
||||
noms.Command("version", "Print the noms version")
|
||||
|
||||
|
||||
@@ -126,6 +126,11 @@ func runShow(ctx context.Context, args []string) int {
|
||||
}
|
||||
|
||||
func outputType(value types.Value) {
|
||||
typeString := typeString(value)
|
||||
fmt.Fprint(os.Stdout, typeString, " - ")
|
||||
}
|
||||
|
||||
func typeString(value types.Value) string {
|
||||
var typeString string
|
||||
switch value := value.(type) {
|
||||
case types.SerialMessage:
|
||||
@@ -156,7 +161,7 @@ func outputType(value types.Value) {
|
||||
util.CheckErrorNoUsage(err)
|
||||
typeString = t.HumanReadableString()
|
||||
}
|
||||
fmt.Fprint(os.Stdout, typeString, " - ")
|
||||
return typeString
|
||||
}
|
||||
|
||||
func outputEncodedValue(ctx context.Context, w io.Writer, value types.Value) error {
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
flag "github.com/juju/gnuflag"
|
||||
|
||||
"github.com/dolthub/dolt/go/gen/fb/serial"
|
||||
"github.com/dolthub/dolt/go/store/cmd/noms/util"
|
||||
"github.com/dolthub/dolt/go/store/config"
|
||||
"github.com/dolthub/dolt/go/store/d"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/nbs"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
"github.com/dolthub/dolt/go/store/util/outputpager"
|
||||
"github.com/dolthub/dolt/go/store/util/verbose"
|
||||
)
|
||||
|
||||
var nomsWalk = &util.Command{
|
||||
Run: runWalk,
|
||||
UsageLine: "walk [flags] [<object>]",
|
||||
Short: "Prints a depth-first listing of all paths to leaf data, beginning with the reference provided. If no ref is provided, uses the manifest root.",
|
||||
Long: "See Spelling Objects at https://github.com/attic-labs/noms/blob/master/doc/spelling.md for details on the object argument.",
|
||||
Flags: setupWalkFlags,
|
||||
Nargs: 0,
|
||||
}
|
||||
|
||||
var (
|
||||
quiet = false
|
||||
)
|
||||
|
||||
func setupWalkFlags() *flag.FlagSet {
|
||||
walkPathSet := flag.NewFlagSet("walk", flag.ExitOnError)
|
||||
outputpager.RegisterOutputpagerFlags(walkPathSet)
|
||||
verbose.RegisterVerboseFlags(walkPathSet)
|
||||
walkPathSet.BoolVar(&quiet, "quiet", false, "If true do not print all ref paths, only dangling refs")
|
||||
return walkPathSet
|
||||
}
|
||||
|
||||
func runWalk(ctx context.Context, args []string) int {
|
||||
cfg := config.NewResolver()
|
||||
|
||||
var value types.Value
|
||||
|
||||
var startHash string
|
||||
if len(args) < 1 {
|
||||
manifestReader, err := os.Open("./.dolt/noms/manifest")
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "Error reading manifest: ", err)
|
||||
return 1
|
||||
}
|
||||
|
||||
manifest, err := nbs.ParseManifest(manifestReader)
|
||||
d.PanicIfError(err)
|
||||
|
||||
startHash = manifest.GetRoot().String()
|
||||
} else {
|
||||
startHash = args[0]
|
||||
}
|
||||
|
||||
fullPath := startHash
|
||||
|
||||
if strings.HasPrefix(fullPath, "#") && !strings.HasPrefix(fullPath, ".dolt/noms::#") {
|
||||
fullPath = ".dolt/noms::" + fullPath
|
||||
} else if !strings.HasPrefix(fullPath, ".dolt/noms::#") {
|
||||
fullPath = ".dolt/noms::#" + fullPath
|
||||
}
|
||||
|
||||
database, vrw, value, err := cfg.GetPath(ctx, fullPath)
|
||||
|
||||
if err != nil {
|
||||
util.CheckErrorNoUsage(err)
|
||||
} else {
|
||||
}
|
||||
|
||||
defer database.Close()
|
||||
|
||||
if value == nil {
|
||||
fmt.Fprintf(os.Stderr, "Object not found: %s\n", fullPath)
|
||||
return 0
|
||||
}
|
||||
|
||||
if showPages {
|
||||
pgr := outputpager.Start()
|
||||
defer pgr.Stop()
|
||||
|
||||
err := walkAddrs(ctx, pgr.Writer, startHash, value, vrw)
|
||||
if err != nil {
|
||||
fmt.Fprintf(pgr.Writer, "error encountered: %s", err.Error())
|
||||
}
|
||||
fmt.Fprintln(pgr.Writer)
|
||||
} else {
|
||||
err := walkAddrs(ctx, os.Stdout, startHash, value, vrw)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stdout, "error encountered: %s", err.Error())
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(os.Stdout)
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
var seenMessages = hash.NewHashSet()
|
||||
var numProcessed = 0
|
||||
|
||||
func walkAddrs(ctx context.Context, w io.Writer, path string, value types.Value, cfg types.ValueReadWriter) error {
|
||||
walk := func(addr hash.Hash) error {
|
||||
value, err := cfg.ReadValue(ctx, addr)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if value == nil {
|
||||
fmt.Fprintf(w, "Dangling reference: hash %s not found for path %s\n", addr.String(), path)
|
||||
return nil
|
||||
}
|
||||
|
||||
numProcessed++
|
||||
|
||||
newPath := fmt.Sprintf("%s > %s(%s)", path, addr.String(), serialType(value))
|
||||
if !quiet {
|
||||
fmt.Fprintf(w, "%s\n", newPath)
|
||||
}
|
||||
|
||||
if numProcessed%100_000 == 0 {
|
||||
fmt.Fprintf(os.Stderr, "%d refs walked\n", numProcessed)
|
||||
}
|
||||
|
||||
// We only want to recurse on messages we haven't seen before. This means not outputting some possible paths to
|
||||
// some chunks, but since there are so very many paths to a typical chunk this is a huge time saver.
|
||||
if !seenMessages.Has(addr) {
|
||||
seenMessages.Insert(addr)
|
||||
return walkAddrs(ctx, w, newPath, value, cfg)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
switch msg := value.(type) {
|
||||
case types.SerialMessage:
|
||||
return msg.WalkAddrs(types.Format_Default, walk)
|
||||
default:
|
||||
// non-serial values can't be walked
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func serialType(value types.Value) string {
|
||||
sm, ok := value.(types.SerialMessage)
|
||||
if !ok {
|
||||
return typeString(value)
|
||||
}
|
||||
|
||||
return serial.GetFileID(sm)
|
||||
}
|
||||
@@ -175,7 +175,7 @@ func (sm SerialMessage) Less(nbf *NomsBinFormat, other LesserValuable) (bool, er
|
||||
const SerialMessageRefHeight = 1024
|
||||
|
||||
func (sm SerialMessage) walkRefs(nbf *NomsBinFormat, cb RefCallback) error {
|
||||
return sm.walkAddrs(nbf, func(addr hash.Hash) error {
|
||||
return sm.WalkAddrs(nbf, func(addr hash.Hash) error {
|
||||
r, err := constructRef(nbf, addr, PrimitiveTypeMap[ValueKind], SerialMessageRefHeight)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -183,7 +183,7 @@ func (sm SerialMessage) walkRefs(nbf *NomsBinFormat, cb RefCallback) error {
|
||||
return cb(r)
|
||||
})
|
||||
}
|
||||
func (sm SerialMessage) walkAddrs(nbf *NomsBinFormat, cb func(addr hash.Hash) error) error {
|
||||
func (sm SerialMessage) WalkAddrs(nbf *NomsBinFormat, cb func(addr hash.Hash) error) error {
|
||||
switch serial.GetFileID(sm) {
|
||||
case serial.StoreRootFileID:
|
||||
var msg serial.StoreRoot
|
||||
@@ -193,7 +193,7 @@ func (sm SerialMessage) walkAddrs(nbf *NomsBinFormat, cb func(addr hash.Hash) er
|
||||
}
|
||||
if msg.AddressMapLength() > 0 {
|
||||
mapbytes := msg.AddressMapBytes()
|
||||
return SerialMessage(mapbytes).walkAddrs(nbf, cb)
|
||||
return SerialMessage(mapbytes).WalkAddrs(nbf, cb)
|
||||
}
|
||||
case serial.TagFileID:
|
||||
var msg serial.Tag
|
||||
@@ -231,7 +231,7 @@ func (sm SerialMessage) walkAddrs(nbf *NomsBinFormat, cb func(addr hash.Hash) er
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = SerialMessage(msg.TablesBytes()).walkAddrs(nbf, cb)
|
||||
err = SerialMessage(msg.TablesBytes()).WalkAddrs(nbf, cb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -295,7 +295,7 @@ func (sm SerialMessage) walkAddrs(nbf *NomsBinFormat, cb func(addr hash.Hash) er
|
||||
}
|
||||
}
|
||||
|
||||
err = SerialMessage(msg.SecondaryIndexesBytes()).walkAddrs(nbf, cb)
|
||||
err = SerialMessage(msg.SecondaryIndexesBytes()).WalkAddrs(nbf, cb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -312,7 +312,7 @@ func (sm SerialMessage) walkAddrs(nbf *NomsBinFormat, cb func(addr hash.Hash) er
|
||||
return cb(ref.TargetHash())
|
||||
})
|
||||
} else {
|
||||
return SerialMessage(mapbytes).walkAddrs(nbf, cb)
|
||||
return SerialMessage(mapbytes).WalkAddrs(nbf, cb)
|
||||
}
|
||||
case serial.CommitFileID:
|
||||
parents, err := SerialCommitParentAddrs(nbf, sm)
|
||||
@@ -340,19 +340,10 @@ func (sm SerialMessage) walkAddrs(nbf *NomsBinFormat, cb func(addr hash.Hash) er
|
||||
return err
|
||||
}
|
||||
}
|
||||
case serial.TableSchemaFileID:
|
||||
case serial.TableSchemaFileID, serial.ForeignKeyCollectionFileID:
|
||||
// no further references from these file types
|
||||
return nil
|
||||
case serial.ForeignKeyCollectionFileID:
|
||||
return nil
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
fallthrough
|
||||
case serial.AddressMapFileID:
|
||||
fallthrough
|
||||
case serial.MergeArtifactsFileID:
|
||||
fallthrough
|
||||
case serial.BlobFileID:
|
||||
fallthrough
|
||||
case serial.CommitClosureFileID:
|
||||
case serial.ProllyTreeNodeFileID, serial.AddressMapFileID, serial.MergeArtifactsFileID, serial.BlobFileID, serial.CommitClosureFileID:
|
||||
return message.WalkAddresses(context.TODO(), serial.Message(sm), func(ctx context.Context, addr hash.Hash) error {
|
||||
return cb(addr)
|
||||
})
|
||||
|
||||
@@ -92,7 +92,7 @@ type ValueStore struct {
|
||||
func AddrsFromNomsValue(ctx context.Context, c chunks.Chunk, nbf *NomsBinFormat) (addrs hash.HashSet, err error) {
|
||||
addrs = hash.NewHashSet()
|
||||
if NomsKind(c.Data()[0]) == SerialMessageKind {
|
||||
err = SerialMessage(c.Data()).walkAddrs(nbf, func(a hash.Hash) error {
|
||||
err = SerialMessage(c.Data()).WalkAddrs(nbf, func(a hash.Hash) error {
|
||||
addrs.Insert(a)
|
||||
return nil
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user