From 0b43e06e2ef996ea5e3e1b9990352ee890edfca5 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Tue, 1 Oct 2019 14:19:47 -0700 Subject: [PATCH] Diff summary includes cells modified --- bats/diff.bats | 122 +++++++++++++++------ go/cmd/dolt/commands/diff.go | 3 +- go/libraries/doltcore/diff/diff_summary.go | 106 ++++++++++++------ go/store/types/tuple.go | 40 +++++++ 4 files changed, 204 insertions(+), 67 deletions(-) diff --git a/bats/diff.bats b/bats/diff.bats index 167610abc5..61b3b0f1a9 100644 --- a/bats/diff.bats +++ b/bats/diff.bats @@ -14,7 +14,7 @@ teardown() { rm -rf "$BATS_TMPDIR/dolt-repo-$$" } -@test "diff summary works" { +@test "diff summary comparing working table to last commit" { dolt table create -s=`batshelper 1pk5col-ints.schema` test dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0 dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1 @@ -24,47 +24,103 @@ teardown() { dolt table put-row test pk:3 c1:11 c2:0 c3:0 c4:0 c5:0 run dolt diff --summary [ "$status" -eq 0 ] - [[ "$output" =~ "2 Rows Unmodified" ]] || false - [[ "$output" =~ "2 Rows Added" ]] || false - [[ "$output" =~ "0 Rows Deleted" ]] || false - [[ "$output" =~ "0 Rows Modified" ]] || false - [[ "$output" =~ "(2 entries vs 4 entries)" ]] || false + [[ "$output" =~ "2 Rows Unmodified (100.00%)" ]] || false + [[ "$output" =~ "2 Rows Added (100.00%)" ]] || false + [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false + [[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false + [[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false + [[ "$output" =~ "(2 Entries vs 4 Entries)" ]] || false dolt add test dolt commit -m "added two rows" - dolt table put-row test pk:0 c1:11 c2:0 c3:0 c4:0 c5:0 + dolt table put-row test pk:0 c1:11 c2:0 c3:0 c4:0 c5:6 run dolt diff --summary [ "$status" -eq 0 ] - [[ "$output" =~ "3 Rows Unmodified" ]] || false - [[ "$output" =~ "0 Rows Added" ]] || false - [[ "$output" =~ "0 Rows Deleted" ]] || false - [[ "$output" =~ "1 Row Modified" ]] || false - [[ "$output" =~ "(4 entries vs 4 entries)" ]] || false + [[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false + [[ "$output" =~ "0 Rows Added (0.00%)" ]] || false + [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false + [[ "$output" =~ "1 Row Modified (25.00%)" ]] || false + [[ "$output" =~ "2 Cells Modified (8.33%)" ]] || false + [[ "$output" =~ "(4 Entries vs 4 Entries)" ]] || false dolt add test dolt commit -m "modified first row" dolt table rm-row test 0 run dolt diff --summary - echo "OUTPUT = $output" - [ "$status" -eq 1 ] - [[ "$output" =~ "3 Rows Unmodified" ]] || false - [[ "$output" =~ "0 Rows Added" ]] || false - [[ "$output" =~ "1 Row Deleted" ]] || false - [[ "$output" =~ "0 Rows Modified" ]] || false - [[ "$output" =~ "(4 entries vs 3 entries)" ]] || false + [ "$status" -eq 0 ] + [[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false + [[ "$output" =~ "0 Rows Added (0.00%)" ]] || false + [[ "$output" =~ "1 Row Deleted (25.00%)" ]] || false + [[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false + [[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false + [[ "$output" =~ "(4 Entries vs 3 Entries)" ]] || false } -@test "diff summary comparing commits" { - dolt checkout -b firstbranch - dolt table create -s=`batshelper 1pk5col-ints.schema` test - dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0 - dolt add test - dolt commit -m "Added one row" - dolt checkout -b newbranch - dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1 - dolt add test - dolt commit -m "Added another row" - run dolt diff --summary firstbranch newbranch - echo "OUTPUT = $output" - [ "$status" -eq 1 ] -} \ No newline at end of file +@test "diff summary comparing two branches" { + dolt checkout -b firstbranch + dolt table create -s=`batshelper 1pk5col-ints.schema` test + dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0 + dolt add test + dolt commit -m "Added one row" + dolt checkout -b newbranch + dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1 + dolt add test + dolt commit -m "Added another row" + run dolt diff --summary newbranch firstbranch + [ "$status" -eq 0 ] + [[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false + [[ "$output" =~ "1 Row Added (100.00%)" ]] || false + [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false + [[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false + [[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false + [[ "$output" =~ "(1 Entry vs 2 Entries)" ]] || false +} + +@test "diff summary shows correct changes after schema change" { + dolt table import -c -s=`batshelper employees-sch.json` employees `batshelper employees-tbl.json` + dolt add employees + dolt commit -m "Added employees table with data" + dolt schema --add-column employees city string + run dolt diff --summary + [ "$status" -eq 0 ] + [[ "$output" =~ "No data changes. See schema changes by using -s or --schema." ]] || false + dolt table put-row employees id:3 "first name":taylor "last name":bantle title:"software engineer" "start date":"" "end date":"" city:"Santa Monica" + run dolt diff --summary + [ "$status" -eq 0 ] + [[ "$output" =~ "3 Rows Unmodified (100.00%)" ]] || false + [[ "$output" =~ "1 Row Added (33.33%)" ]] || false + [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false + [[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false + [[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false + [[ "$output" =~ "(3 Entries vs 4 Entries)" ]] || false + dolt table put-row employees id:0 "first name":tim "last name":sehn title:ceo "start date":"2 years ago" "end date":"" city:"Santa Monica" + run dolt diff --summary + [ "$status" -eq 0 ] + [[ "$output" =~ "2 Rows Unmodified (66.67%)" ]] || false + [[ "$output" =~ "1 Row Added (33.33%)" ]] || false + [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false + [[ "$output" =~ "1 Row Modified (33.33%)" ]] || false + [[ "$output" =~ "2 Cells Modified (11.11%)" ]] || false + [[ "$output" =~ "(3 Entries vs 4 Entries)" ]] || false +} + +@test "diff summary gets summaries for all tables with changes" { + dolt table create -s=`batshelper 1pk5col-ints.schema` test + dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0 + dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1 + dolt table create -s=`batshelper employees-sch.json` employees + dolt table put-row employees id:0 "first name":tim "last name":sehn title:ceo "start date":"" "end date":"" + dolt add test employees + dolt commit -m "test tables created" + dolt table put-row test pk:2 c1:11 c2:0 c3:0 c4:0 c5:0 + dolt table put-row employees id:1 "first name":brian "last name":hendriks title:founder "start date":"" "end date":"" + run dolt diff --summary + echo "$output" + [ "$status" -eq 0 ] + [[ "$output" =~ "diff --dolt a/test b/test" ]] || false + [[ "$output" =~ "--- a/test @" ]] || false + [[ "$output" =~ "+++ b/test @" ]] || false + [[ "$output" =~ "diff --dolt a/employees b/employees" ]] || false + [[ "$output" =~ "--- a/employees @" ]] || false + [[ "$output" =~ "+++ b/employees @" ]] || false +} diff --git a/go/cmd/dolt/commands/diff.go b/go/cmd/dolt/commands/diff.go index bc28612267..f643966d40 100644 --- a/go/cmd/dolt/commands/diff.go +++ b/go/cmd/dolt/commands/diff.go @@ -302,7 +302,8 @@ func diffRoots(ctx context.Context, r1, r2 *doltdb.RootValue, tblNames []string, var verr errhand.VerboseError if summary { - verr = diff.Summary(ctx, rowData2, rowData1) + colLen := sch2.GetAllCols().Size() + verr = diff.Summary(ctx, rowData1, rowData2, colLen) } if diffParts&SchemaOnlyDiff != 0 && sch1Hash != sch2Hash && !summary { diff --git a/go/libraries/doltcore/diff/diff_summary.go b/go/libraries/doltcore/diff/diff_summary.go index 26e09bc238..3ec3ee1f5b 100644 --- a/go/libraries/doltcore/diff/diff_summary.go +++ b/go/libraries/doltcore/diff/diff_summary.go @@ -24,6 +24,7 @@ import ( "github.com/liquidata-inc/dolt/go/cmd/dolt/cli" "github.com/liquidata-inc/dolt/go/cmd/dolt/errhand" "github.com/liquidata-inc/dolt/go/store/atomicerr" + "github.com/liquidata-inc/dolt/go/store/datas" "github.com/liquidata-inc/dolt/go/store/diff" "github.com/liquidata-inc/dolt/go/store/types" "github.com/liquidata-inc/dolt/go/store/util/status" @@ -31,41 +32,41 @@ import ( type diffFunc func(changeChan chan<- types.ValueChanged, stopChan <-chan struct{}) -func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError { - // if is1, err := datas.IsCommit(v1); err != nil { - // return errhand.BuildDError("").AddCause(err).Build() - // } else if is1 { - // if is2, err := datas.IsCommit(v2); err != nil { - // return errhand.BuildDError("").AddCause(err).Build() - // } else if is2 { - // cli.Println("Comparing commit values") +// Summary prints a summary of the diff between two values +func Summary(ctx context.Context, v1, v2 types.Value, colLen int) errhand.VerboseError { + if is1, err := datas.IsCommit(v1); err != nil { + return errhand.BuildDError("").AddCause(err).Build() + } else if is1 { + if is2, err := datas.IsCommit(v2); err != nil { + return errhand.BuildDError("").AddCause(err).Build() + } else if is2 { + cli.Println("Comparing commit values") - // var err error - // v1, _, err = v1.(types.Struct).MaybeGet(datas.ValueField) - // if err != nil { - // return errhand.BuildDError("").AddCause(err).Build() - // } + var err error + v1, _, err = v1.(types.Struct).MaybeGet(datas.ValueField) + if err != nil { + return errhand.BuildDError("").AddCause(err).Build() + } - // v2, _, err = v2.(types.Struct).MaybeGet(datas.ValueField) - // if err != nil { - // return errhand.BuildDError("").AddCause(err).Build() - // } - // } - // } + v2, _, err = v2.(types.Struct).MaybeGet(datas.ValueField) + if err != nil { + return errhand.BuildDError("").AddCause(err).Build() + } + } + } - // will values ever not be MapKind? var singular, plural string if v1.Kind() == v2.Kind() { switch v1.Kind() { case types.StructKind: - singular = "field" - plural = "fields" + singular = "Field" + plural = "Fields" case types.MapKind: - singular = "entry" - plural = "entries" + singular = "Entry" + plural = "Entries" default: - singular = "value" - plural = "values" + singular = "Value" + plural = "Values" } } @@ -80,7 +81,7 @@ func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError { rp.Store(r) } }() - verr = diffSummary(ctx, ae, ch, v1, v2) + verr = diffSummary(ctx, ae, ch, v2, v1) }() if verr != nil { @@ -96,6 +97,7 @@ func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError { acc.Adds += p.Adds acc.Removes += p.Removes acc.Changes += p.Changes + acc.CellChanges += p.CellChanges acc.NewSize += p.NewSize acc.OldSize += p.OldSize } @@ -108,15 +110,18 @@ func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError { err := fmt.Errorf("panic occured during closing: %v", r) return errhand.BuildDError("").AddCause(err).Build() } - - formatStatus(acc, singular, plural) + if acc.NewSize > 0 || acc.OldSize > 0 { + formatStatus(acc, singular, plural, colLen) + } else { + cli.Println("No data changes. See schema changes by using -s or --schema.") + } status.Done() return nil } type diffSummaryProgress struct { - Adds, Removes, Changes, NewSize, OldSize uint64 + Adds, Removes, Changes, CellChanges, NewSize, OldSize uint64 } func diffSummary(ctx context.Context, ae *atomicerr.AtomicError, ch chan diffSummaryProgress, v1, v2 types.Value) errhand.VerboseError { @@ -245,7 +250,10 @@ func diffSummaryValueChanged(ae *atomicerr.AtomicError, ch chan<- diffSummaryPro }() f(changeChan, stopChan) }() - reportChanges(ch, changeChan) + verr := reportChanges(ch, changeChan) + if verr != nil { + return verr + } if r := rp.Load(); r != nil { err := fmt.Errorf("panic occured during closing: %v", r) return errhand.BuildDError("").AddCause(err).Build() @@ -254,6 +262,7 @@ func diffSummaryValueChanged(ae *atomicerr.AtomicError, ch chan<- diffSummaryPro } func reportChanges(ch chan<- diffSummaryProgress, changeChan chan types.ValueChanged) errhand.VerboseError { + var verr errhand.VerboseError for change := range changeChan { switch change.ChangeType { case types.DiffChangeAdded: @@ -262,14 +271,42 @@ func reportChanges(ch chan<- diffSummaryProgress, changeChan chan types.ValueCha ch <- diffSummaryProgress{Removes: 1} case types.DiffChangeModified: ch <- diffSummaryProgress{Changes: 1} + cellChanges, err := getCellChanges(change.NewValue, change.OldValue, change.Key) + if err != nil { + verr = errhand.BuildDError("").AddCause(err).Build() + } + ch <- diffSummaryProgress{CellChanges: cellChanges} default: return errhand.BuildDError("unknown change type").Build() } } + if verr != nil { + return verr + } return nil } -func formatStatus(acc diffSummaryProgress, singular, plural string) { +func getCellChanges(oldVal, newVal, key types.Value) (uint64, error) { + var cellsChanged uint64 + var err error + + oldTuple := oldVal.(types.Tuple) + newTuple := newVal.(types.Tuple) + + if oldTuple.Len() > newTuple.Len() { + cellsChanged, err = oldTuple.CountDifferencesBetweenTupleFields(newTuple) + } else { + cellsChanged, err = newTuple.CountDifferencesBetweenTupleFields(oldTuple) + } + + if err != nil { + return 0, err + } + + return cellsChanged, nil +} + +func formatStatus(acc diffSummaryProgress, singular, plural string, colLen int) { pluralize := func(singular, plural string, n uint64) string { var noun string if n != 1 { @@ -285,14 +322,17 @@ func formatStatus(acc diffSummaryProgress, singular, plural string) { insertions := pluralize("Row Added", "Rows Added", acc.Adds) deletions := pluralize("Row Deleted", "Rows Deleted", acc.Removes) changes := pluralize("Row Modified", "Rows Modified", acc.Changes) - // cellChanges := pluralize("cell modified", "cells modified", acc.CellsModified) + cellChanges := pluralize("Cell Modified", "Cells Modified", acc.CellChanges) oldValues := pluralize(singular, plural, acc.OldSize) newValues := pluralize(singular, plural, acc.NewSize) + percentCellsChanged := float64(100*acc.CellChanges) / (float64(acc.OldSize) * float64(colLen)) + cli.Printf("%s (%.2f%%)\n", unmodified, (float64(100*rowsUnmodified) / float64(acc.OldSize))) cli.Printf("%s (%.2f%%)\n", insertions, (float64(100*acc.Adds) / float64(acc.OldSize))) cli.Printf("%s (%.2f%%)\n", deletions, (float64(100*acc.Removes) / float64(acc.OldSize))) cli.Printf("%s (%.2f%%)\n", changes, (float64(100*acc.Changes) / float64(acc.OldSize))) + cli.Printf("%s (%.2f%%)\n", cellChanges, percentCellsChanged) cli.Printf("(%s vs %s)\n", oldValues, newValues) } diff --git a/go/store/types/tuple.go b/go/store/types/tuple.go index 387693bfe2..3f590c535c 100644 --- a/go/store/types/tuple.go +++ b/go/store/types/tuple.go @@ -443,3 +443,43 @@ func (t Tuple) Less(nbf *NomsBinFormat, other LesserValuable) (bool, error) { return TupleKind < other.Kind(), nil } + +// CountDifferencesBetweenTupleFields returns the number of fields that are different between two +// tuples and does not panic if tuples are different lengths. +func (t Tuple) CountDifferencesBetweenTupleFields(other Tuple) (uint64, error) { + changed := 0 + + err := t.IterFields(func(index uint64, val Value) (stop bool, err error) { + dec, count := other.decoderSkipToFields() + + // Prevents comparing column tags + if index%2 == 1 { + if index >= count { + changed++ + return false, nil + } else { + for i := uint64(0); i < index; i++ { + err := dec.skipValue(other.format()) + + if err != nil { + return true, err + } + } + otherVal, err := dec.readValue(other.format()) + if err != nil { + return true, err + } + if !otherVal.Equals(val) { + changed++ + } + } + } + + return false, nil + }) + + if err != nil { + return 0, err + } + return uint64(changed), nil +}