Diff summary includes cells modified

This commit is contained in:
Taylor Bantle
2019-10-01 14:19:47 -07:00
parent 93f07b64fc
commit 0b43e06e2e
4 changed files with 204 additions and 67 deletions
+89 -33
View File
@@ -14,7 +14,7 @@ teardown() {
rm -rf "$BATS_TMPDIR/dolt-repo-$$"
}
@test "diff summary works" {
@test "diff summary comparing working table to last commit" {
dolt table create -s=`batshelper 1pk5col-ints.schema` test
dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0
dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1
@@ -24,47 +24,103 @@ teardown() {
dolt table put-row test pk:3 c1:11 c2:0 c3:0 c4:0 c5:0
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "2 Rows Unmodified" ]] || false
[[ "$output" =~ "2 Rows Added" ]] || false
[[ "$output" =~ "0 Rows Deleted" ]] || false
[[ "$output" =~ "0 Rows Modified" ]] || false
[[ "$output" =~ "(2 entries vs 4 entries)" ]] || false
[[ "$output" =~ "2 Rows Unmodified (100.00%)" ]] || false
[[ "$output" =~ "2 Rows Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(2 Entries vs 4 Entries)" ]] || false
dolt add test
dolt commit -m "added two rows"
dolt table put-row test pk:0 c1:11 c2:0 c3:0 c4:0 c5:0
dolt table put-row test pk:0 c1:11 c2:0 c3:0 c4:0 c5:6
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified" ]] || false
[[ "$output" =~ "0 Rows Added" ]] || false
[[ "$output" =~ "0 Rows Deleted" ]] || false
[[ "$output" =~ "1 Row Modified" ]] || false
[[ "$output" =~ "(4 entries vs 4 entries)" ]] || false
[[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (25.00%)" ]] || false
[[ "$output" =~ "2 Cells Modified (8.33%)" ]] || false
[[ "$output" =~ "(4 Entries vs 4 Entries)" ]] || false
dolt add test
dolt commit -m "modified first row"
dolt table rm-row test 0
run dolt diff --summary
echo "OUTPUT = $output"
[ "$status" -eq 1 ]
[[ "$output" =~ "3 Rows Unmodified" ]] || false
[[ "$output" =~ "0 Rows Added" ]] || false
[[ "$output" =~ "1 Row Deleted" ]] || false
[[ "$output" =~ "0 Rows Modified" ]] || false
[[ "$output" =~ "(4 entries vs 3 entries)" ]] || false
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "1 Row Deleted (25.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(4 Entries vs 3 Entries)" ]] || false
}
@test "diff summary comparing commits" {
dolt checkout -b firstbranch
dolt table create -s=`batshelper 1pk5col-ints.schema` test
dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0
dolt add test
dolt commit -m "Added one row"
dolt checkout -b newbranch
dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1
dolt add test
dolt commit -m "Added another row"
run dolt diff --summary firstbranch newbranch
echo "OUTPUT = $output"
[ "$status" -eq 1 ]
}
@test "diff summary comparing two branches" {
dolt checkout -b firstbranch
dolt table create -s=`batshelper 1pk5col-ints.schema` test
dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0
dolt add test
dolt commit -m "Added one row"
dolt checkout -b newbranch
dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1
dolt add test
dolt commit -m "Added another row"
run dolt diff --summary newbranch firstbranch
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(1 Entry vs 2 Entries)" ]] || false
}
@test "diff summary shows correct changes after schema change" {
dolt table import -c -s=`batshelper employees-sch.json` employees `batshelper employees-tbl.json`
dolt add employees
dolt commit -m "Added employees table with data"
dolt schema --add-column employees city string
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "No data changes. See schema changes by using -s or --schema." ]] || false
dolt table put-row employees id:3 "first name":taylor "last name":bantle title:"software engineer" "start date":"" "end date":"" city:"Santa Monica"
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (33.33%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(3 Entries vs 4 Entries)" ]] || false
dolt table put-row employees id:0 "first name":tim "last name":sehn title:ceo "start date":"2 years ago" "end date":"" city:"Santa Monica"
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "2 Rows Unmodified (66.67%)" ]] || false
[[ "$output" =~ "1 Row Added (33.33%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (33.33%)" ]] || false
[[ "$output" =~ "2 Cells Modified (11.11%)" ]] || false
[[ "$output" =~ "(3 Entries vs 4 Entries)" ]] || false
}
@test "diff summary gets summaries for all tables with changes" {
dolt table create -s=`batshelper 1pk5col-ints.schema` test
dolt table put-row test pk:0 c1:0 c2:0 c3:0 c4:0 c5:0
dolt table put-row test pk:1 c1:1 c2:1 c3:1 c4:1 c5:1
dolt table create -s=`batshelper employees-sch.json` employees
dolt table put-row employees id:0 "first name":tim "last name":sehn title:ceo "start date":"" "end date":""
dolt add test employees
dolt commit -m "test tables created"
dolt table put-row test pk:2 c1:11 c2:0 c3:0 c4:0 c5:0
dolt table put-row employees id:1 "first name":brian "last name":hendriks title:founder "start date":"" "end date":""
run dolt diff --summary
echo "$output"
[ "$status" -eq 0 ]
[[ "$output" =~ "diff --dolt a/test b/test" ]] || false
[[ "$output" =~ "--- a/test @" ]] || false
[[ "$output" =~ "+++ b/test @" ]] || false
[[ "$output" =~ "diff --dolt a/employees b/employees" ]] || false
[[ "$output" =~ "--- a/employees @" ]] || false
[[ "$output" =~ "+++ b/employees @" ]] || false
}
+2 -1
View File
@@ -302,7 +302,8 @@ func diffRoots(ctx context.Context, r1, r2 *doltdb.RootValue, tblNames []string,
var verr errhand.VerboseError
if summary {
verr = diff.Summary(ctx, rowData2, rowData1)
colLen := sch2.GetAllCols().Size()
verr = diff.Summary(ctx, rowData1, rowData2, colLen)
}
if diffParts&SchemaOnlyDiff != 0 && sch1Hash != sch2Hash && !summary {
+73 -33
View File
@@ -24,6 +24,7 @@ import (
"github.com/liquidata-inc/dolt/go/cmd/dolt/cli"
"github.com/liquidata-inc/dolt/go/cmd/dolt/errhand"
"github.com/liquidata-inc/dolt/go/store/atomicerr"
"github.com/liquidata-inc/dolt/go/store/datas"
"github.com/liquidata-inc/dolt/go/store/diff"
"github.com/liquidata-inc/dolt/go/store/types"
"github.com/liquidata-inc/dolt/go/store/util/status"
@@ -31,41 +32,41 @@ import (
type diffFunc func(changeChan chan<- types.ValueChanged, stopChan <-chan struct{})
func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError {
// if is1, err := datas.IsCommit(v1); err != nil {
// return errhand.BuildDError("").AddCause(err).Build()
// } else if is1 {
// if is2, err := datas.IsCommit(v2); err != nil {
// return errhand.BuildDError("").AddCause(err).Build()
// } else if is2 {
// cli.Println("Comparing commit values")
// Summary prints a summary of the diff between two values
func Summary(ctx context.Context, v1, v2 types.Value, colLen int) errhand.VerboseError {
if is1, err := datas.IsCommit(v1); err != nil {
return errhand.BuildDError("").AddCause(err).Build()
} else if is1 {
if is2, err := datas.IsCommit(v2); err != nil {
return errhand.BuildDError("").AddCause(err).Build()
} else if is2 {
cli.Println("Comparing commit values")
// var err error
// v1, _, err = v1.(types.Struct).MaybeGet(datas.ValueField)
// if err != nil {
// return errhand.BuildDError("").AddCause(err).Build()
// }
var err error
v1, _, err = v1.(types.Struct).MaybeGet(datas.ValueField)
if err != nil {
return errhand.BuildDError("").AddCause(err).Build()
}
// v2, _, err = v2.(types.Struct).MaybeGet(datas.ValueField)
// if err != nil {
// return errhand.BuildDError("").AddCause(err).Build()
// }
// }
// }
v2, _, err = v2.(types.Struct).MaybeGet(datas.ValueField)
if err != nil {
return errhand.BuildDError("").AddCause(err).Build()
}
}
}
// will values ever not be MapKind?
var singular, plural string
if v1.Kind() == v2.Kind() {
switch v1.Kind() {
case types.StructKind:
singular = "field"
plural = "fields"
singular = "Field"
plural = "Fields"
case types.MapKind:
singular = "entry"
plural = "entries"
singular = "Entry"
plural = "Entries"
default:
singular = "value"
plural = "values"
singular = "Value"
plural = "Values"
}
}
@@ -80,7 +81,7 @@ func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError {
rp.Store(r)
}
}()
verr = diffSummary(ctx, ae, ch, v1, v2)
verr = diffSummary(ctx, ae, ch, v2, v1)
}()
if verr != nil {
@@ -96,6 +97,7 @@ func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError {
acc.Adds += p.Adds
acc.Removes += p.Removes
acc.Changes += p.Changes
acc.CellChanges += p.CellChanges
acc.NewSize += p.NewSize
acc.OldSize += p.OldSize
}
@@ -108,15 +110,18 @@ func Summary(ctx context.Context, v1, v2 types.Value) errhand.VerboseError {
err := fmt.Errorf("panic occured during closing: %v", r)
return errhand.BuildDError("").AddCause(err).Build()
}
formatStatus(acc, singular, plural)
if acc.NewSize > 0 || acc.OldSize > 0 {
formatStatus(acc, singular, plural, colLen)
} else {
cli.Println("No data changes. See schema changes by using -s or --schema.")
}
status.Done()
return nil
}
type diffSummaryProgress struct {
Adds, Removes, Changes, NewSize, OldSize uint64
Adds, Removes, Changes, CellChanges, NewSize, OldSize uint64
}
func diffSummary(ctx context.Context, ae *atomicerr.AtomicError, ch chan diffSummaryProgress, v1, v2 types.Value) errhand.VerboseError {
@@ -245,7 +250,10 @@ func diffSummaryValueChanged(ae *atomicerr.AtomicError, ch chan<- diffSummaryPro
}()
f(changeChan, stopChan)
}()
reportChanges(ch, changeChan)
verr := reportChanges(ch, changeChan)
if verr != nil {
return verr
}
if r := rp.Load(); r != nil {
err := fmt.Errorf("panic occured during closing: %v", r)
return errhand.BuildDError("").AddCause(err).Build()
@@ -254,6 +262,7 @@ func diffSummaryValueChanged(ae *atomicerr.AtomicError, ch chan<- diffSummaryPro
}
func reportChanges(ch chan<- diffSummaryProgress, changeChan chan types.ValueChanged) errhand.VerboseError {
var verr errhand.VerboseError
for change := range changeChan {
switch change.ChangeType {
case types.DiffChangeAdded:
@@ -262,14 +271,42 @@ func reportChanges(ch chan<- diffSummaryProgress, changeChan chan types.ValueCha
ch <- diffSummaryProgress{Removes: 1}
case types.DiffChangeModified:
ch <- diffSummaryProgress{Changes: 1}
cellChanges, err := getCellChanges(change.NewValue, change.OldValue, change.Key)
if err != nil {
verr = errhand.BuildDError("").AddCause(err).Build()
}
ch <- diffSummaryProgress{CellChanges: cellChanges}
default:
return errhand.BuildDError("unknown change type").Build()
}
}
if verr != nil {
return verr
}
return nil
}
func formatStatus(acc diffSummaryProgress, singular, plural string) {
func getCellChanges(oldVal, newVal, key types.Value) (uint64, error) {
var cellsChanged uint64
var err error
oldTuple := oldVal.(types.Tuple)
newTuple := newVal.(types.Tuple)
if oldTuple.Len() > newTuple.Len() {
cellsChanged, err = oldTuple.CountDifferencesBetweenTupleFields(newTuple)
} else {
cellsChanged, err = newTuple.CountDifferencesBetweenTupleFields(oldTuple)
}
if err != nil {
return 0, err
}
return cellsChanged, nil
}
func formatStatus(acc diffSummaryProgress, singular, plural string, colLen int) {
pluralize := func(singular, plural string, n uint64) string {
var noun string
if n != 1 {
@@ -285,14 +322,17 @@ func formatStatus(acc diffSummaryProgress, singular, plural string) {
insertions := pluralize("Row Added", "Rows Added", acc.Adds)
deletions := pluralize("Row Deleted", "Rows Deleted", acc.Removes)
changes := pluralize("Row Modified", "Rows Modified", acc.Changes)
// cellChanges := pluralize("cell modified", "cells modified", acc.CellsModified)
cellChanges := pluralize("Cell Modified", "Cells Modified", acc.CellChanges)
oldValues := pluralize(singular, plural, acc.OldSize)
newValues := pluralize(singular, plural, acc.NewSize)
percentCellsChanged := float64(100*acc.CellChanges) / (float64(acc.OldSize) * float64(colLen))
cli.Printf("%s (%.2f%%)\n", unmodified, (float64(100*rowsUnmodified) / float64(acc.OldSize)))
cli.Printf("%s (%.2f%%)\n", insertions, (float64(100*acc.Adds) / float64(acc.OldSize)))
cli.Printf("%s (%.2f%%)\n", deletions, (float64(100*acc.Removes) / float64(acc.OldSize)))
cli.Printf("%s (%.2f%%)\n", changes, (float64(100*acc.Changes) / float64(acc.OldSize)))
cli.Printf("%s (%.2f%%)\n", cellChanges, percentCellsChanged)
cli.Printf("(%s vs %s)\n", oldValues, newValues)
}
+40
View File
@@ -443,3 +443,43 @@ func (t Tuple) Less(nbf *NomsBinFormat, other LesserValuable) (bool, error) {
return TupleKind < other.Kind(), nil
}
// CountDifferencesBetweenTupleFields returns the number of fields that are different between two
// tuples and does not panic if tuples are different lengths.
func (t Tuple) CountDifferencesBetweenTupleFields(other Tuple) (uint64, error) {
changed := 0
err := t.IterFields(func(index uint64, val Value) (stop bool, err error) {
dec, count := other.decoderSkipToFields()
// Prevents comparing column tags
if index%2 == 1 {
if index >= count {
changed++
return false, nil
} else {
for i := uint64(0); i < index; i++ {
err := dec.skipValue(other.format())
if err != nil {
return true, err
}
}
otherVal, err := dec.readValue(other.format())
if err != nil {
return true, err
}
if !otherVal.Equals(val) {
changed++
}
}
}
return false, nil
})
if err != nil {
return 0, err
}
return uint64(changed), nil
}