Test for first malicious DB

This commit is contained in:
Neil Macneale IV
2024-09-29 16:02:45 -07:00
parent 0612097667
commit 33d0e45e59
24 changed files with 207 additions and 0 deletions

View File

@@ -0,0 +1,67 @@
// Copyright 2023 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package commands
import (
"context"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/utils/argparser"
)
type FsckCmd struct{}
var _ cli.Command = FsckCmd{}
func (cmd FsckCmd) Description() string {
//TODO implement me
panic("implement me")
}
func (cmd FsckCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv, _ cli.CliContext) int {
progress := make(chan interface{}, 32)
defer close(progress)
fsckHandleProgress(progress)
err := dEnv.DoltDB.FSCK(ctx, progress)
if err != nil {
cli.PrintErrln(err.Error())
return 1
}
return 0
}
func fsckHandleProgress(progress chan interface{}) {
go func() {
for item := range progress {
cli.Println(item)
}
}()
}
func (cmd FsckCmd) Docs() *cli.CommandDocumentation {
//TODO implement me
panic("implement Docs")
}
func (cmd FsckCmd) ArgParser() *argparser.ArgParser {
return &argparser.ArgParser{}
}
func (cmd FsckCmd) Name() string {
return "fsck"
}

View File

@@ -109,6 +109,7 @@ var doltSubCommands = []cli.Command{
indexcmds.Commands,
commands.ReadTablesCmd{},
commands.GarbageCollectionCmd{},
commands.FsckCmd{},
commands.FilterBranchCmd{},
commands.MergeBaseCmd{},
commands.RootsCmd{},
@@ -151,6 +152,7 @@ var commandsWithoutCliCtx = []cli.Command{
&commands.Assist{},
commands.ProfileCmd{},
commands.ArchiveCmd{},
commands.FsckCmd{},
}
var commandsWithoutGlobalArgSupport = []cli.Command{

View File

@@ -2045,3 +2045,39 @@ func (ddb *DoltDB) GetStashRootAndHeadCommitAtIdx(ctx context.Context, idx int)
func (ddb *DoltDB) PersistGhostCommits(ctx context.Context, ghostCommits hash.HashSet) error {
return ddb.db.Database.PersistGhostCommitIDs(ctx, ghostCommits)
}
func (ddb *DoltDB) FSCK(ctx context.Context, progress chan interface{}) error {
cs := datas.ChunkStoreFromDatabase(ddb.db)
hashChan := make(chan hash.Hash)
if gs, ok := cs.(*nbs.GenerationalNBS); ok {
go func() {
defer close(hashChan)
gs.OldGen().GetChunkHashes(ctx, hashChan)
gs.NewGen().GetChunkHashes(ctx, hashChan)
}()
} else {
return errors.New("fsck command requires a local database")
}
for h := range hashChan {
chk, err := cs.Get(ctx, h)
if err != nil {
return err
}
if chk.Hash() != h {
return errors.New(fmt.Sprintf("Chunk: %s read with incorrect ID: %s", h.String(), chk.Hash().String()))
}
raw := chk.Data()
calcChkSum := hash.Of(raw)
if chk.Hash() != calcChkSum {
return errors.New(fmt.Sprintf("Chunk: %s read with incorrect checksum: %s", h.String(), calcChkSum.String()))
}
progress <- "OK: " + h.String()
}
return nil
}

View File

@@ -186,6 +186,9 @@ type ChunkStoreGarbageCollector interface {
// interactions with generational stores. See ValueStore and
// NomsBlockStore/GenerationalNBS for details.
MarkAndSweepChunks(ctx context.Context, hashes <-chan []hash.Hash, dest ChunkStore) error
// GetChunkAddresses NM4.
GetChunkHashes(context.Context, chan hash.Hash)
}
type PrefixChunkStore interface {

View File

@@ -382,6 +382,11 @@ LOOP:
return nil
}
func (ms *MemoryStoreView) GetChunkHashes(ctx context.Context, hashes chan hash.Hash) {
//TODO implement me
panic("implement me")
}
func (ms *MemoryStoreView) Stats() interface{} {
return nil
}

View File

@@ -99,6 +99,11 @@ func (s *TestStoreView) MarkAndSweepChunks(ctx context.Context, hashes <-chan []
return collector.MarkAndSweepChunks(ctx, hashes, collector)
}
func (s *TestStoreView) GetChunkHashes(ctx context.Context, hashes chan hash.Hash) {
//NM4 implement me
panic("implement me")
}
func (s *TestStoreView) Reads() int {
reads := atomic.LoadInt32(&s.reads)
return int(reads)

View File

@@ -152,3 +152,8 @@ func (acs archiveChunkSource) getRecordRanges(_ context.Context, _ []getRecord)
func (acs archiveChunkSource) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) {
return false, errors.New("Archive chunk source does not support getManyCompressed")
}
func (acs archiveChunkSource) getAllChunkHashes(_ context.Context, _ chan hash.Hash) {
//TODO implement me
panic("implement me")
}

View File

@@ -53,3 +53,8 @@ func (csa chunkSourceAdapter) clone() (chunkSource, error) {
}
return &chunkSourceAdapter{tr, csa.h}, nil
}
func (csa chunkSourceAdapter) getAllChunkHashes(_ context.Context, _ chan hash.Hash) {
//TODO implement me
panic("implement me")
}

View File

@@ -93,3 +93,8 @@ func (ecs emptyChunkSource) close() error {
func (ecs emptyChunkSource) clone() (chunkSource, error) {
return ecs, nil
}
func (ecs emptyChunkSource) getAllChunkHashes(_ context.Context, _ chan hash.Hash) {
//TODO implement me
panic("implement me")
}

View File

@@ -169,6 +169,17 @@ func (ftr *fileTableReader) hash() hash.Hash {
return ftr.h
}
func (ftr *fileTableReader) getAllChunkHashes(_ context.Context, out chan hash.Hash) {
for i := uint32(0); i < ftr.idx.chunkCount(); i++ {
var h hash.Hash
_, err := ftr.idx.indexEntry(i, &h)
if err != nil {
panic(err) // NM4
}
out <- h
}
}
func (ftr *fileTableReader) Close() error {
return ftr.tableReader.close()
}

View File

@@ -209,6 +209,11 @@ func (s journalChunkSource) close() error {
return nil
}
func (s journalChunkSource) getAllChunkHashes(_ context.Context, _ chan hash.Hash) {
// NM4 - figure out journals!
return
}
func equalSpecs(left, right []tableSpec) bool {
if len(left) != len(right) {
return false

View File

@@ -83,6 +83,11 @@ func (nbsMW *NBSMetricWrapper) MarkAndSweepChunks(ctx context.Context, hashes <-
return nbsMW.nbs.MarkAndSweepChunks(ctx, hashes, dest)
}
func (nbsMW *NBSMetricWrapper) GetChunkHashes(ctx context.Context, hashes chan hash.Hash) {
//TODO implement me
panic("implement me")
}
// PruneTableFiles deletes old table files that are no longer referenced in the manifest.
func (nbsMW *NBSMetricWrapper) PruneTableFiles(ctx context.Context) error {
return nbsMW.nbs.PruneTableFiles(ctx)

View File

@@ -1667,6 +1667,16 @@ LOOP:
return gcc.copyTablesToDir(ctx, tfp)
}
func (nbs *NomsBlockStore) GetChunkHashes(ctx context.Context, hashes chan hash.Hash) {
for _, v := range nbs.tables.novel {
v.getAllChunkHashes(ctx, hashes)
}
for _, v := range nbs.tables.upstream {
v.getAllChunkHashes(ctx, hashes)
}
}
func (nbs *NomsBlockStore) swapTables(ctx context.Context, specs []tableSpec) (err error) {
nbs.mu.Lock()
defer nbs.mu.Unlock()

View File

@@ -240,6 +240,9 @@ type chunkSource interface {
// currentSize returns the current total physical size of the chunkSource.
currentSize() uint64
// NM4 - I dunno. Maybe?
getAllChunkHashes(context.Context, chan hash.Hash)
}
type chunkSources []chunkSource

View File

@@ -0,0 +1,9 @@
This directory contains a set of corrupt databases that are used to test the behavior of fsck,
and perhaps other tools in the future. Please catalog the contents of each database here.
(.dolt/* -> test-dir)
## bad_commit
This database contains a commit (rlmgv0komq0oj7qu4osdo759vs4c5pvg) that has contents in the database
that do not have contents which matches the address (Actual data in table file: gpphmuvegiedtjtbfku4ru8jalfdk21u).
This represents updating the author of that commit maliciously.

View File

@@ -0,0 +1 @@
5:__DOLT__:pfculecuhmg18r2v4cf52p6lnp9r4rrk:vq4ej4fr5vhmrmbmc7f4bsh7ieg7rro8:pfculecuhmg18r2v4cf52p6lnp9r4rrk:9tj851gpv71861ln5k6uutfot66u79r4:2

View File

@@ -0,0 +1 @@
5:__DOLT__:qld52734cm7e9efc91v55nb7601pkid5:00000000000000000000000000000000:00000000000000000000000000000000:aa3a08lf6sbtq9cl58mledifrqvt3fid:15

View File

@@ -0,0 +1,6 @@
{
"head": "refs/heads/main",
"remotes": {},
"backups": {},
"branches": {}
}

View File

@@ -0,0 +1,22 @@
#! /usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
setup_no_dolt_init
}
teardown() {
teardown_common
}
@test "fsck" {
mkdir ".dolt"
cp -R "$BATS_TEST_DIRNAME/corrupt_dbs/bad_commit/" .dolt/
dolt status
run dolt fsck
[ "$status" -eq 1 ]
[[ "$output" =~ "rlmgv0komq0oj7qu4osdo759vs4c5pvg read with incorrect checksum: gpphmuvegiedtjtbfku4ru8jalfdk21u" ]]
}

View File

@@ -139,6 +139,7 @@ SKIP_SERVER_TESTS=$(cat <<-EOM
~rebase.bats~
~shallow-clone.bats~
~archive.bats~
~fsck.bats~
EOM
)